/*
 * This file is part of Jstacs.
 *
 * Jstacs is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Jstacs is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Jstacs.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * For more information on Jstacs, visit http://www.jstacs.de
 */

package de.jstacs.io;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

/**
 * This class implements the reader that extracts strings from either a file or a string. The class ignores lines
 * starting with a given character, since those lines are treated as comments. If the user does not specify this
 * character, it is set to &quot;#&quot; internally. If the user specifies this character as &quot;&gt;&quot;, the file or String 
 * will be treated as in FastA-format, i.e. lines beginning with &gt; will be stripped and the lines between two &gt; (or until the
 * end of the file) will be appended to form a new String.
 * 
 * @author Jan Grau, Jens Keilwagen
 */
public class StringExtractor
{
	private String annotation;

	private String[] strs;

	private int last;

	private Pattern ignorePattern;
	
	private char ignore;
	
	private StringBuffer current;

	/**
	 * The usual comment-character is #. Lines beginning with this sign will be ignored.
	 */
	public static char USUALLY = '#';

	/**
	 * The comment-character for FastA-formatted files is '>'. If '>' is specified as the comment-character,
	 * the file or String will be interpreted as in FastA format.
	 */
	public static char FASTA = '>';

	private StringExtractor( int initSize, char ignore ) throws IllegalArgumentException
	{
		if( initSize < 1 )
		{
			throw new IllegalArgumentException( "The initSize is too small." );
		}
		last = 0;
		strs = new String[initSize];
		this.ignore = ignore;
		this.ignorePattern = Pattern.compile("^\\s*"+ignore+".*");
	}

	/**
	 * A constructor that reads the lines from <code>file</code>.
	 * 
	 * @param file
	 *            the file to be read from
	 * @param initSize
	 *            the inital number of lines the can be handled
	 * 
	 * @throws IOException
	 *             is thrown if the file could not be read
	 * @throws FileNotFoundException
	 *             is thrown if the file could not be found
	 */
	public StringExtractor( File file, int initSize ) throws IOException, FileNotFoundException
	{
		this( file, initSize, USUALLY );
	}

	/**
	 * A constructor that reads the lines from <code>file</code> and ignores those starting with <code>ignore</code>.
	 * 
	 * @param file
	 *            the file to be read from
	 * @param initSize
	 *            the inital number of lines the can be handled
	 * @param ignore
	 *            the first character of lines that should be treated as comments
	 * 
	 * @throws IOException
	 *             is thrown if the file could not be read
	 * @throws FileNotFoundException
	 *             is thrown if the file could not be found
	 */
	public StringExtractor( File file, int initSize, char ignore ) throws IOException, FileNotFoundException
	{
		this( initSize, ignore );
		BufferedReader reader = new java.io.BufferedReader( new java.io.FileReader( file ) );
		String str = null;
		while( (str = reader.readLine()) != null )
		{
			insert( str );
		}
		if(current != null && current.length() > 0){
			strs[last++] = current.toString();
			current.delete( 0, current.length() );
		}
		reader.close();
		annotation = file.getName();
	}

	/**
	 * A constructor that reads the lines from <code>file</code> .
	 * 
	 * @param file
	 *            the file to be read from
	 * @param initSize
	 *            the inital number of lines the can be handled
	 * @param annotation
	 *            the annotation for the source
	 * 
	 * @throws IOException
	 *             is thrown if the file could not be read
	 * @throws FileNotFoundException
	 *             is thrown if the file could not be found
	 */
	public StringExtractor( File file, int initSize, String annotation ) throws IOException, FileNotFoundException
	{
		this( file, initSize );
		this.annotation = annotation;
	}

	/**
	 * A constructor that reads the lines from <code>file</code> and ignores those starting with <code>ignore</code>.
	 * 
	 * @param file
	 *            the file to be read from
	 * @param initSize
	 *            the inital number of lines the can be handled
	 * @param ignore
	 *            the first character of lines that should be treated as comments
	 * @param annotation
	 *            the annotation for the source
	 * 
	 * @throws IOException
	 *             is thrown if the file could not be read
	 * @throws FileNotFoundException
	 *             is thrown if the file could not be found
	 */
	public StringExtractor( File file, int initSize, char ignore, String annotation ) throws IOException,
			FileNotFoundException
	{
		this( file, initSize, ignore );
		this.annotation = annotation;
	}

	/**
	 * A constructor that reads the lines from a String <code>content</code>.
	 * 
	 * @param content
	 *            the complete String with all lines
	 * @param initSize
	 *            the inital number of lines the can be handled
	 * @param annotation
	 *            some annotation for the content
	 */
	public StringExtractor( String content, int initSize, String annotation )
	{
		this( content, initSize, USUALLY, annotation );
	}

	/**
	 * A constructor that reads the lines from a String <code>content</code> and ignores those starting with <code>ignore</code>
	 * 
	 * @param content
	 *            the complete String with all lines
	 * @param initSize
	 *            the inital number of lines the can be handled
	 * @param ignore
	 *            the first character of lines that should be treated as comments
	 * @param annotation
	 *            some annotation for the content
	 */
	public StringExtractor( String content, int initSize, char ignore, String annotation )
	{
		this( initSize, ignore );
		StringTokenizer tok = new StringTokenizer( content, "\n" );
		while( tok.hasMoreTokens() )
		{
			insert( tok.nextToken() );
		}
		if(current != null && current.length() > 0){
			strs[last++] = current.toString();
			current.delete( 0, current.length() );
		}
		this.annotation = annotation;
	}

	private void expand()
	{
		String[] temp = new String[2 * strs.length];
		System.arraycopy( strs, 0, temp, 0, strs.length );
		strs = temp;
		temp = null;
	}

	/**
	 * Returns the string with index <code>i</code>.
	 * 
	 * @param i
	 *            the index
	 * 
	 * @return the string with index <code>i</code>
	 */
	public String getString( int i )
	{
		if( i >= last )
		{
			throw new IndexOutOfBoundsException();
		}
		return strs[i];
	}

	/**
	 * Returns the number of strings
	 * 
	 * @return the number of strings
	 */
	public int getNumberOfStrings()
	{
		return last;
	}

	private void insert( String str )
	{
		if(ignore != FASTA){
			if( str.length() > 0 && !ignorePattern.matcher( str ).matches() )
			{
				strs[last++] = str;
				if( last == strs.length )
				{
					expand();
				}
			}
		}else{
			if(current == null){
				current = new StringBuffer();
			}
			if(str.length() > 0 && !ignorePattern.matcher( str ).matches()){
				current.append( str );
			}else if(str.length() > 0){
				if(current.length() > 0){
					strs[last++] = current.toString();
					if(last == strs.length){
						expand();
					}
				}
				current.delete( 0, current.length() );
			}
		}
	}

	/**
	 * Returns the annotation of the source.
	 * 
	 * @return the annotation
	 */
	public String getAnnotation()
	{
		return annotation;
	}
}
