/*
 * Decompiled with CFR 0.152.
 */
package projects.talGA;

import de.jstacs.data.AlphabetContainer;
import de.jstacs.data.DataSet;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.alphabets.DiscreteAlphabet;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.SimpleDiscreteSequence;
import de.jstacs.data.sequences.SparseSequence;
import de.jstacs.io.FileManager;
import de.jstacs.io.XMLParser;
import de.jstacs.optimization.geneticAlgorithms.GeneticAlgorithm2;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.CachingFitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.CombinedFitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.FitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.fitnessFunctions.MultiThreadedFitnessFunction;
import de.jstacs.optimization.geneticAlgorithms.initialization.FixedPopulationInitializer;
import de.jstacs.optimization.geneticAlgorithms.operations.Operation;
import de.jstacs.optimization.geneticAlgorithms.operations.SimpleCrossover;
import de.jstacs.optimization.geneticAlgorithms.operations.SimpleShift;
import de.jstacs.optimization.geneticAlgorithms.operations.SymbolDependentMutation;
import de.jstacs.optimization.geneticAlgorithms.populations.DiscreteSequencePopulation;
import de.jstacs.optimization.geneticAlgorithms.populations.Population;
import de.jstacs.optimization.geneticAlgorithms.populations.individuals.DiscreteSequenceIndividual;
import de.jstacs.optimization.geneticAlgorithms.selection.RandomByFitnessSelection;
import de.jstacs.utils.Normalisation;
import de.jstacs.utils.RealTime;
import de.jstacs.utils.SafeOutputStream;
import de.jstacs.utils.Time;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import projects.talGA.GlobalTALENFitnessFunction;
import projects.talGA.RVDDistanceFitnessFunction;
import projects.talGA.RVDFitnessFunction;
import projects.talGA.TALENIndividual;
import projects.talGA.TALENMatchFitnessFunction;
import projects.talGA.TALENOffTargetFitnessFunction;
import projects.talGA.TALENPopulationInitializer;
import projects.talGA.TALENTargetFinder;
import projects.talGA.TALENWrapperFitnessFunction;
import projects.tals.ScanForTBSWeb;
import projects.tals.TALgetterDiffSM;

public class TALENDesigner {
    public static void main(String[] args) throws Exception {
        GeneticAlgorithm2<DiscreteSequenceIndividual> ga;
        int j;
        TALgetterDiffSM talFunction = (TALgetterDiffSM)XMLParser.extractObjectForTags(FileManager.readInputStream(ScanForTBSWeb.class.getClassLoader().getResourceAsStream("projects/tals/talfinder_obg2_hyp_bg.xml")), "model");
        System.out.println(talFunction);
        talFunction.fix();
        DiscreteAlphabet rvdAlph = (DiscreteAlphabet)talFunction.getRVDAlphabet().getAlphabetAt(0);
        int repetitions = Integer.parseInt(args[3]);
        int length = Integer.parseInt(args[4]);
        boolean homo = Boolean.parseBoolean(args[5]);
        int minDist = Integer.parseInt(args[6]);
        int maxDist = Integer.parseInt(args[7]);
        int numThreads = Integer.parseInt(args[8]);
        boolean thresholdByBestScore = true;
        int[] strong = new int[]{rvdAlph.getCode("NN"), rvdAlph.getCode("NH"), rvdAlph.getCode("HD"), rvdAlph.getCode("HN")};
        int[] allowed = new int[]{rvdAlph.getCode("NI"), rvdAlph.getCode("HD"), rvdAlph.getCode("NG"), rvdAlph.getCode("NN"), rvdAlph.getCode("NH"), rvdAlph.getCode("NK")};
        int initPopSize = 100;
        int finalPopSize = 100;
        int top = 10;
        DataSet positives = TALENDesigner.readFile(args[0], null)[0];
        DataSet negatives = TALENDesigner.readFile(args[1], positives)[0];
        DataSet background = TALENDesigner.readFile(args[2], positives)[0];
        TALENTargetFinder negFinder = negatives == null ? null : new TALENTargetFinder(negatives, talFunction, length);
        TALENTargetFinder backFinder = new TALENTargetFinder(background, talFunction, length);
        LinkedList<FitnessFunction<DiscreteSequenceIndividual>> fitnesses = new LinkedList<FitnessFunction<DiscreteSequenceIndividual>>();
        fitnesses.add(new GlobalTALENFitnessFunction(talFunction, positives, backFinder, GlobalTALENFitnessFunction.Objective.MAXMARGIN, minDist, maxDist, thresholdByBestScore));
        fitnesses.add(new GlobalTALENFitnessFunction(talFunction, positives, backFinder, GlobalTALENFitnessFunction.Objective.CL, minDist, maxDist, thresholdByBestScore));
        fitnesses.add(new TALENWrapperFitnessFunction(new RVDFitnessFunction(strong, 3), homo, true));
        fitnesses.add(new TALENWrapperFitnessFunction(new RVDDistanceFitnessFunction(strong, 5), homo, true));
        fitnesses.add(new TALENMatchFitnessFunction(positives, talFunction, minDist, maxDist));
        fitnesses.add(new TALENOffTargetFitnessFunction(talFunction, positives, minDist, maxDist, backFinder, thresholdByBestScore));
        if (negatives != null) {
            fitnesses.add(new GlobalTALENFitnessFunction(talFunction, positives, negFinder, GlobalTALENFitnessFunction.Objective.MAXMARGIN, minDist, maxDist, thresholdByBestScore));
            fitnesses.add(new GlobalTALENFitnessFunction(talFunction, positives, negFinder, GlobalTALENFitnessFunction.Objective.CL, minDist, maxDist, thresholdByBestScore));
            fitnesses.add(new TALENOffTargetFitnessFunction(talFunction, positives, minDist, maxDist, negFinder, thresholdByBestScore));
        }
        double fac = 0.05;
        double[] facs = new double[fitnesses.size()];
        Arrays.fill(facs, fac);
        FitnessFunction<Object> combined = new CombinedFitnessFunction(fitnesses.toArray(new FitnessFunction[0]), facs, true);
        combined = new MultiThreadedFitnessFunction(combined, numThreads);
        System.out.println("Simple Population");
        RealTime time = new RealTime();
        ((Time)time).reset();
        Population<DiscreteSequenceIndividual> simplePop = TALENDesigner.getSimplePopulation(initPopSize, positives, length, minDist, maxDist, homo, talFunction, combined, allowed);
        System.out.println(simplePop);
        TALENPopulationInitializer initializer = new TALENPopulationInitializer(homo ? length : 2 * length, homo, talFunction.getRVDAlphabet(), allowed);
        FixedPopulationInitializer<DiscreteSequenceIndividual> simpleInit = new FixedPopulationInitializer<DiscreteSequenceIndividual>(simplePop);
        double[][] condProbs = new double[(int)rvdAlph.length()][(int)rvdAlph.length()];
        boolean[] isStrong = new boolean[condProbs.length];
        int i = 0;
        while (i < isStrong.length) {
            j = 0;
            while (j < strong.length) {
                if (strong[j] == i) {
                    isStrong[i] = true;
                    break;
                }
                ++j;
            }
            ++i;
        }
        i = 0;
        while (i < condProbs.length) {
            j = 0;
            while (j < allowed.length) {
                condProbs[i][allowed[j]] = i == allowed[j] ? 0.0 : (isStrong[i] == isStrong[allowed[j]] ? 2.0 : 1.0);
                ++j;
            }
            Normalisation.sumNormalisation(condProbs[i]);
            ++i;
        }
        SymbolDependentMutation mutation = new SymbolDependentMutation(true, 0.05, condProbs);
        SimpleCrossover crossover = new SimpleCrossover(0.8);
        SimpleShift shift = new SimpleShift(true, 0.5);
        RandomByFitnessSelection selection = new RandomByFitnessSelection(true);
        Population[] pops = new Population[repetitions];
        int r = 0;
        while (r < repetitions) {
            System.out.println("repetition " + r);
            ga = new GeneticAlgorithm2<DiscreteSequenceIndividual>(r == 0 ? simpleInit : initializer, new Operation[]{mutation, crossover, shift}, selection, combined);
            ga.optimize(1000, initPopSize);
            pops[r] = ga.getFinalPopulation().removeDuplicates().getBestIndividuals((int)Math.ceil((double)finalPopSize / (double)repetitions));
            backFinder.reset();
            if (negFinder != null) {
                negFinder.reset();
            }
            ((CachingFitnessFunction)combined).reset();
            ++r;
        }
        System.out.println("final");
        Population init = new DiscreteSequencePopulation(new DiscreteSequenceIndividual[0]).join(pops);
        ga = new GeneticAlgorithm2<DiscreteSequenceIndividual>(new FixedPopulationInitializer(init), new Operation[]{mutation, crossover, shift}, selection, combined);
        ga.optimize(100, finalPopSize);
        Population<DiscreteSequenceIndividual> pop = ga.getFinalPopulation();
        pop = pop.removeDuplicates();
        pop = pop.join(init, simplePop);
        pop = pop.removeDuplicates();
        pop.sortIndividuals();
        ((MultiThreadedFitnessFunction)combined).stopWorkers();
        combined = new CachingFitnessFunction<DiscreteSequenceIndividual>(combined);
        combined.setOutputStream(SafeOutputStream.getSafeOutputStream(System.out));
        System.out.println("************************************************");
        System.out.println("Result:");
        System.out.println("++++++++++++++++++++++++++++++++++++++++++++++++");
        ArrayList list = new ArrayList();
        int i2 = 0;
        while (i2 < pop.getNumberOfIndivuals()) {
            DiscreteSequenceIndividual individual = pop.getIndividual(i2);
            System.out.println(individual);
            System.out.println(combined.getFitness(individual));
            ++i2;
        }
    }

    public static Population<DiscreteSequenceIndividual> getSimplePopulation(int topN, DataSet positives, int length, int minDist, int maxDist, boolean homo, TALgetterDiffSM model, FitnessFunction<DiscreteSequenceIndividual> fitness, int[] allowed) throws Exception {
        ArrayList<TALENIndividual> list = new ArrayList<TALENIndividual>();
        int i = 0;
        while (i < positives.getNumberOfElements()) {
            System.out.println("i: " + i);
            Sequence seq = positives.getElementAt(i);
            int m = 0;
            while (m < 2) {
                int j = 0;
                while (j < seq.getLength() - 2 * length - minDist) {
                    System.out.println("j: " + j);
                    Sequence sub = seq.getSubSequence(j, length + 1);
                    Sequence tal1 = model.getBestRVDsFor(sub, allowed);
                    if (homo) {
                        TALENIndividual ind = new TALENIndividual((SimpleDiscreteSequence)tal1, homo);
                        ind.setFitness((FitnessFunction)fitness);
                        list.add(ind);
                    } else {
                        int k = j + length + minDist;
                        while (k < seq.getLength() - length && k <= j + length + maxDist) {
                            System.out.println("k: " + k);
                            Sequence sub2 = seq.getSubSequence(k, length + 1).reverseComplement();
                            Sequence tal2 = model.getBestRVDsFor(sub2, allowed);
                            Sequence tal = Sequence.create(tal1.getAlphabetContainer(), null, String.valueOf(tal1.toString("-", 0, tal1.getLength())) + tal2.toString("-", 0, tal2.getLength()), "-");
                            TALENIndividual ind = new TALENIndividual((SimpleDiscreteSequence)tal, homo);
                            ind.setFitness((FitnessFunction)fitness);
                            list.add(ind);
                            ++k;
                        }
                    }
                    ++j;
                }
                if (!homo) break;
                seq = seq.reverseComplement();
                ++m;
            }
            ++i;
        }
        DiscreteSequencePopulation pop = new DiscreteSequencePopulation(list.toArray(new DiscreteSequenceIndividual[0]));
        return pop.getBestIndividuals(topN);
    }

    public static void addFilteredfilter(AlphabetContainer con, String str, String[] positives, List<Sequence> seqs, List<Sequence> outs) throws Exception {
        LinkedList<String> strs = new LinkedList<String>();
        strs.add(str);
        LinkedList<String> strs2 = new LinkedList<String>();
        block0: while (strs.size() > 0) {
            String curr = (String)strs.pop();
            int i = 0;
            while (i < positives.length) {
                int idx = curr.indexOf(positives[i]);
                if (idx >= 0) {
                    strs.add(curr.substring(0, idx));
                    strs.add(curr.substring(idx + positives[i].length()));
                    outs.add(new SparseSequence(con, positives[i]));
                    continue block0;
                }
                ++i;
            }
            strs2.add(curr);
        }
        Iterator it = strs2.iterator();
        while (it.hasNext()) {
            seqs.add(new SparseSequence(con, (String)it.next()));
        }
    }

    public static DataSet[] readFile(String path, DataSet positives) throws Exception {
        if (path.equals("null")) {
            return new DataSet[2];
        }
        String[] posStr = new String[positives == null ? 0 : positives.getNumberOfElements()];
        int i = 0;
        while (i < posStr.length) {
            posStr[i] = positives.getElementAt(i).toString();
            ++i;
        }
        BufferedReader read = new BufferedReader(new FileReader(path));
        DNAAlphabetContainer con = DNAAlphabetContainer.SINGLETON;
        Pattern acgt = Pattern.compile("[ACGT]+", 2);
        ArrayList<Sequence> seqs = new ArrayList<Sequence>();
        ArrayList<Sequence> out = new ArrayList<Sequence>();
        long l = 0L;
        StringBuffer line = new StringBuffer();
        String str = "";
        while ((str = read.readLine()) != null) {
            if (str.startsWith(">")) {
                if (line.length() <= 0) continue;
                String lines = line.toString();
                TALENDesigner.addFilteredfilter(con, lines, posStr, seqs, out);
                l += (long)line.length();
                line.delete(0, line.length());
                continue;
            }
            str = str.toUpperCase();
            Matcher match = acgt.matcher(str);
            while (match.find()) {
                int start = match.start();
                int end = match.end();
                if (start > 0 || end < str.length()) {
                    if (start > 0 && line.length() > 0) {
                        TALENDesigner.addFilteredfilter(con, line.toString(), posStr, seqs, out);
                        l += (long)line.length();
                        line.delete(0, line.length());
                    }
                    line.append(str.substring(start, end));
                    if (end >= str.length() || line.length() <= 0) continue;
                    TALENDesigner.addFilteredfilter(con, line.toString(), posStr, seqs, out);
                    l += (long)line.length();
                    line.delete(0, line.length());
                    continue;
                }
                line.append(str);
            }
        }
        read.close();
        if (line.length() > 0) {
            TALENDesigner.addFilteredfilter(con, line.toString(), posStr, seqs, out);
            l += (long)line.length();
            line.delete(0, line.length());
        }
        str = null;
        System.out.println("total length: " + l);
        DataSet ds = new DataSet("", seqs);
        DataSet ds2 = null;
        System.out.println("excluded: " + out.size());
        if (out.size() > 0) {
            ds2 = new DataSet("", out);
        }
        seqs.clear();
        out.clear();
        return new DataSet[]{ds, ds2};
    }
}

