/*
 * Decompiled with CFR 0.152.
 */
package genomeviewer.algorithm;

import java.util.ArrayList;

public class SplitToWords {
    private String desc;
    private String[] keyWordFilter = new String[]{"protein", "proteins", "enzyme", "enzymes", "similar", "similarity", "weakly", "strongly", "strong", "weak", "system", "putative", "hypothetical", "function", "type", "conserved", "interpro", "ipr", "pfam", "smart", "cog", "region", "and", "large", "small", "family", "superfamily", "minor", "major", "high", "highly", "low", "lowly", "very", "related", "predicted", "like", "likely", "potential", "possible", "hit", "hits", "subunit", "subunits", "part", "parts", "involved", "previous", "associated", "only", "linked", "required", "feature", "features", "membership", "domain", "domains", "specificity", "specific", "for", "from", "cds", "to", "an", "in", "for", "on", "of", "but", "with", "unclear", "no", "not", "-", "+", "unknown", "the", "confidence", "confident", "uncharacterized", "sequence", "pubmed", ".", "*", "/", "present", "one", "during", "some", "any", "is", "are", "found", "percent", "identity", "ORF", "classified", "suggest", "codon", "start", "end", "tail", "identical", "pir", "strain"};

    public SplitToWords(String desc) {
        this.desc = desc;
    }

    public ArrayList getWords() {
        this.desc = this.desc.replaceAll("\\||\\(|\\)|\\,|\\[|\\]|;|:", " ").replaceAll("\\s+", " ");
        String[] rawWords = this.desc.split(" ");
        ArrayList<String> al = new ArrayList<String>();
        String lastKeyWord = "";
        for (int i = 0; i < rawWords.length; ++i) {
            String thisWord = rawWords[i];
            boolean flag = true;
            for (int j = 0; j < this.keyWordFilter.length; ++j) {
                if (!thisWord.equalsIgnoreCase(this.keyWordFilter[j])) continue;
                lastKeyWord = "";
                flag = false;
            }
            if (!flag) continue;
            if (thisWord.length() <= 2) {
                lastKeyWord = thisWord;
                continue;
            }
            if (!lastKeyWord.equals("") && lastKeyWord.length() <= 2) {
                thisWord = lastKeyWord + " " + thisWord;
            }
            al.add(thisWord);
            lastKeyWord = thisWord;
        }
        return al;
    }

    public static void main(String[] args) {
        SplitToWords stw = new SplitToWords("similar to home weakly strongly protein | what s(important)hallo");
        ArrayList words = stw.getWords();
        for (int i = 0; i < words.size(); ++i) {
            System.out.println(words.get(i).toString());
        }
    }
}

