package abner;

import edu.umass.cs.mallet.base.fst.CRF4;
import edu.umass.cs.mallet.base.fst.MultiSegmentationEvaluator;
import edu.umass.cs.mallet.base.pipe.Pipe;
import edu.umass.cs.mallet.base.pipe.SerialPipes;
import edu.umass.cs.mallet.base.pipe.TokenSequence2FeatureVectorSequence;
import edu.umass.cs.mallet.base.pipe.iterator.LineGroupIterator;
import edu.umass.cs.mallet.base.pipe.tsf.OffsetConjunctions;
import edu.umass.cs.mallet.base.pipe.tsf.RegexMatches;
import edu.umass.cs.mallet.base.pipe.tsf.TokenTextCharPrefix;
import edu.umass.cs.mallet.base.pipe.tsf.TokenTextCharSuffix;
import edu.umass.cs.mallet.base.types.InstanceList;
import java.io.File;
import java.io.FileReader;
import java.util.regex.Pattern;

/* loaded from: input_file:abner/Trainer.class */
public class Trainer {
    int numEvaluations = 0;
    static int iterationsBetweenEvals = 16;
    private static String CAPS = "[A-Z��������������]";
    private static String LOW = "[a-z��������������]";
    private static String CAPSNUM = "[A-Z��������������0-9]";
    private static String ALPHA = "[A-Z��������������a-z��������������]";
    private static String ALPHANUM = "[A-Z��������������a-z��������������0-9]";
    private static String PUNCTUATION = "[,\\.;:?!()]";
    private static String QUOTE = "[\"`']";
    private static String GREEK = "(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)";

    public void train(String str, String str2) {
        train(str, str2, null);
    }

    /* JADX WARN: Type inference failed for: r7v17, types: [int[], int[][]] */
    public void train(String str, String str2, String[] strArr) {
        try {
            SerialPipes serialPipes = new SerialPipes(new Pipe[]{new Input2TokenSequence(), new RegexMatches("INITCAPS", Pattern.compile("[A-Z].*")), new RegexMatches("INITCAPSALPHA", Pattern.compile("[A-Z][a-z].*")), new RegexMatches("ALLCAPS", Pattern.compile("[A-Z]+")), new RegexMatches("CAPSMIX", Pattern.compile("[A-Za-z]+")), new RegexMatches("HASDIGIT", Pattern.compile(".*[0-9].*")), new RegexMatches("SINGLEDIGIT", Pattern.compile("[0-9]")), new RegexMatches("DOUBLEDIGIT", Pattern.compile("[0-9][0-9]")), new RegexMatches("NATURALNUMBER", Pattern.compile("[0-9]+")), new RegexMatches("REALNUMBER", Pattern.compile("[-0-9]+[.,]+[0-9.,]+")), new RegexMatches("HASDASH", Pattern.compile(".*-.*")), new RegexMatches("INITDASH", Pattern.compile("-.*")), new RegexMatches("ENDDASH", Pattern.compile(".*-")), new TokenTextCharPrefix("PREFIX=", 3), new TokenTextCharPrefix("PREFIX=", 4), new TokenTextCharSuffix("SUFFIX=", 3), new TokenTextCharSuffix("SUFFIX=", 4), new OffsetConjunctions(new int[]{new int[]{-1}, new int[]{1}}), new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[A-Za-z].*[0-9].*")), new RegexMatches("ALPHANUMERIC", Pattern.compile(".*[0-9].*[A-Za-z].*")), new RegexMatches("ROMAN", Pattern.compile("[IVXDLCM]+")), new RegexMatches("HASROMAN", Pattern.compile(".*\\b[IVXDLCM]+\\b.*")), new RegexMatches("GREEK", Pattern.compile(GREEK)), new RegexMatches("HASGREEK", Pattern.compile(new StringBuffer().append(".*\\b").append(GREEK).append("\\b.*").toString())), new RegexMatches("PUNCTUATION", Pattern.compile("[,.;:?!-+]")), new TokenSequence2FeatureVectorSequence(true, true)});
            CRF4 crf4 = new CRF4(serialPipes, (Pipe) null);
            System.out.println(new StringBuffer().append("Reading '").append(str).append("' file...").toString());
            InstanceList instanceList = new InstanceList(serialPipes);
            instanceList.add(new LineGroupIterator(new FileReader(new File(str)), Pattern.compile("^.*$"), false));
            System.out.println("Doing the deed...");
            System.out.println(new StringBuffer().append("Number of features = ").append(serialPipes.getDataAlphabet().size()).toString());
            System.out.println(new StringBuffer().append("Training on ").append(instanceList.size()).append(" training instances...").toString());
            crf4.addStatesForLabelsConnectedAsIn(instanceList);
            if (strArr != null) {
                String[] strArr2 = new String[strArr.length];
                String[] strArr3 = new String[strArr.length];
                for (int i = 0; i < strArr.length; i++) {
                    strArr2[i] = new StringBuffer().append("B-").append(strArr[i]).toString();
                    strArr3[i] = new StringBuffer().append("I-").append(strArr[i]).toString();
                }
                crf4.train(instanceList, (InstanceList) null, (InstanceList) null, new MultiSegmentationEvaluator(strArr2, strArr3, false), 99999, 10, new double[]{0.2d, 0.5d, 0.8d});
            } else {
                crf4.train(instanceList, (InstanceList) null, (InstanceList) null, (MultiSegmentationEvaluator) null, 99999, 10, new double[]{0.2d, 0.5d, 0.8d});
            }
            crf4.write(new File(str2));
        } catch (Exception e) {
            System.err.println(e);
        }
    }
}
