/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie.machinereading;

import edu.stanford.nlp.ie.machinereading.MachineReadingProperties;
import edu.stanford.nlp.ie.machinereading.common.NoPunctuationHeadFinder;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.parser.common.ParserAnnotations;
import edu.stanford.nlp.parser.common.ParserConstraint;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;

public class GenericDataSetReader {
    protected Logger logger;
    protected final HeadFinder headFinder = new NoPunctuationHeadFinder();
    protected StanfordCoreNLP processor;
    protected Annotator parserProcessor;
    protected final boolean preProcessSentences;
    protected final boolean calculateHeadSpan;
    protected final boolean forceGenerationOfIndexSpans;
    protected boolean useNewHeadFinder = true;

    public GenericDataSetReader() {
        this(null, false, false, false);
    }

    public GenericDataSetReader(StanfordCoreNLP processor, boolean preProcessSentences, boolean calculateHeadSpan, boolean forceGenerationOfIndexSpans) {
        this.logger = Logger.getLogger(GenericDataSetReader.class.getName());
        this.logger.setLevel(Level.SEVERE);
        if (processor != null) {
            this.setProcessor(processor);
        }
        this.parserProcessor = null;
        this.preProcessSentences = preProcessSentences;
        this.calculateHeadSpan = calculateHeadSpan;
        this.forceGenerationOfIndexSpans = forceGenerationOfIndexSpans;
    }

    public void setProcessor(StanfordCoreNLP p) {
        this.processor = p;
    }

    public void setUseNewHeadFinder(boolean useNewHeadFinder) {
        this.useNewHeadFinder = useNewHeadFinder;
    }

    public Annotator getParser() {
        if (this.parserProcessor == null) {
            this.parserProcessor = StanfordCoreNLP.getExistingAnnotator("parse");
            assert (this.parserProcessor != null);
        }
        return this.parserProcessor;
    }

    public void setLoggerLevel(Level level) {
        this.logger.setLevel(level);
    }

    public Level getLoggerLevel() {
        return this.logger.getLevel();
    }

    public final Annotation parse(String path) throws IOException {
        Annotation retVal;
        try {
            retVal = this.read(path);
        }
        catch (Exception ex) {
            IOException iox = new IOException();
            iox.initCause(ex);
            throw iox;
        }
        if (this.preProcessSentences) {
            this.preProcessSentences(retVal);
            if (MachineReadingProperties.trainUsePipelineNER) {
                this.logger.severe("Changing NER tags using the CoreNLP pipeline.");
                this.modifyUsingCoreNLPNER(retVal);
            }
        }
        return retVal;
    }

    private void modifyUsingCoreNLPNER(Annotation doc) {
        Properties ann = new Properties();
        ann.setProperty("annotators", "pos, lemma, ner");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);
        pipeline.annotate(doc);
        for (CoreMap sentence : (List)doc.get(CoreAnnotations.SentencesAnnotation.class)) {
            List entities = (List)sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
            if (entities == null) continue;
            List tokens = (List)sentence.get(CoreAnnotations.TokensAnnotation.class);
            for (EntityMention en : entities) {
                Span s = en.getExtent();
                ClassicCounter<String> allNertagforSpan = new ClassicCounter<String>();
                for (int i = s.start(); i < s.end(); ++i) {
                    allNertagforSpan.incrementCount(((CoreLabel)tokens.get(i)).ner());
                }
                String entityNertag = (String)Counters.argmax(allNertagforSpan);
                en.setType(entityNertag);
            }
        }
    }

    public Annotation read(String path) throws Exception {
        return null;
    }

    private static String sentenceToString(List<CoreLabel> tokens) {
        StringBuilder os = new StringBuilder();
        if (tokens != null) {
            boolean first = true;
            for (CoreLabel token : tokens) {
                if (!first) {
                    os.append(" ");
                }
                os.append(token.word());
                first = false;
            }
        }
        return os.toString();
    }

    public int assignSyntacticHead(EntityMention ent, Tree tree, List<CoreLabel> tokens, boolean setHeadSpan) {
        if (ent.getSyntacticHeadTokenPosition() != -1) {
            return ent.getSyntacticHeadTokenPosition();
        }
        this.logger.finest("Finding syntactic head for entity: " + ent + " in tree: " + tree.toString());
        this.logger.finest("Flat sentence is: " + tokens);
        Tree sh = null;
        try {
            sh = this.findSyntacticHead(ent, tree, tokens);
        }
        catch (Exception e) {
            this.logger.severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + GenericDataSetReader.sentenceToString(tokens));
            e.printStackTrace();
        }
        catch (AssertionError e) {
            this.logger.severe("WARNING: failed to parse sentence. Will continue with the right-most head heuristic: " + GenericDataSetReader.sentenceToString(tokens));
            ((Throwable)((Object)e)).printStackTrace();
        }
        int headPos = ent.getExtentTokenEnd() - 1;
        if (sh != null) {
            CoreLabel label = (CoreLabel)sh.label();
            headPos = (Integer)label.get(CoreAnnotations.BeginIndexAnnotation.class);
        } else {
            this.logger.fine("WARNING: failed to find syntactic head for entity: " + ent + " in tree: " + tree);
            this.logger.fine("Fallback strategy: will set head to last token in mention: " + tokens.get(headPos));
        }
        ent.setHeadTokenPosition(headPos);
        if (setHeadSpan) {
            ent.setHeadTokenSpan(new Span(headPos, headPos + 1));
        }
        return headPos;
    }

    public void preProcessSentences(Annotation dataset) {
        List sentences;
        this.logger.severe("GenericDataSetReader: Started pre-processing the corpus...");
        if (this.processor != null) {
            sentences = (List)dataset.get(CoreAnnotations.SentencesAnnotation.class);
            if (sentences.size() > 0 && !((CoreMap)sentences.get(0)).containsKey(TreeCoreAnnotations.TreeAnnotation.class)) {
                this.logger.info("Annotating dataset with " + this.processor);
                this.processor.annotate(dataset);
            } else {
                this.logger.info("Found existing syntactic annotations. Will not use the NLP processor.");
            }
        }
        sentences = (List)dataset.get(CoreAnnotations.SentencesAnnotation.class);
        this.logger.fine("Extracted " + sentences.size() + " sentences.");
        for (CoreMap sentence : sentences) {
            List tokens = (List)sentence.get(CoreAnnotations.TokensAnnotation.class);
            this.logger.fine("Processing sentence " + tokens);
            Tree tree = (Tree)sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            if (tree == null) {
                throw new RuntimeException("ERROR: MR requires full syntactic analysis!");
            }
            GenericDataSetReader.convertToCoreLabels(tree);
            CoreLabel l = (CoreLabel)tree.label();
            if (this.forceGenerationOfIndexSpans || !l.containsKey(CoreAnnotations.BeginIndexAnnotation.class) && !l.containsKey(CoreAnnotations.EndIndexAnnotation.class)) {
                tree.indexSpans(0);
                this.logger.fine("Index spans were generated.");
            } else {
                this.logger.fine("Index spans were NOT generated.");
            }
            this.logger.fine("Parse tree using CoreLabel:\n" + tree.pennString());
            if (sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class) == null) continue;
            for (EntityMention ent : (List)sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class)) {
                this.logger.fine("Finding head for entity: " + ent);
                int headPos = this.assignSyntacticHead(ent, tree, tokens, this.calculateHeadSpan);
                this.logger.fine("Syntactic head of mention \"" + ent + "\" is: " + ((CoreLabel)tokens.get(headPos)).word());
                assert (ent.getExtent() != null);
                assert (ent.getHead() != null);
                assert (ent.getSyntacticHeadTokenPosition() >= 0);
            }
        }
        this.logger.severe("GenericDataSetReader: Pre-processing complete.");
    }

    public static void convertToCoreLabels(Tree tree) {
        Label l = tree.label();
        if (!(l instanceof CoreLabel)) {
            CoreLabel cl = new CoreLabel();
            cl.setValue(l.value());
            tree.setLabel(cl);
        }
        for (Tree kid : tree.children()) {
            GenericDataSetReader.convertToCoreLabels(kid);
        }
    }

    private static String printTree(Tree tree) {
        StringBuilder sb = new StringBuilder();
        return tree.toStringBuilder(sb, true).toString();
    }

    private Tree safeHead(Tree top) {
        Tree head = top.headTerminal(this.headFinder);
        if (head != null) {
            return head;
        }
        List leaves = top.getLeaves();
        if (leaves.size() > 0) {
            return (Tree)leaves.get(leaves.size() - 1);
        }
        return top;
    }

    public Tree findSyntacticHead(EntityMention ent, Tree root, List<CoreLabel> tokens) {
        if (!this.useNewHeadFinder) {
            return this.originalFindSyntacticHead(ent, root, tokens);
        }
        this.logger.fine("Searching for tree matching " + ent);
        Tree exactMatch = GenericDataSetReader.findTreeWithSpan(root, ent.getExtentTokenStart(), ent.getExtentTokenEnd());
        if (exactMatch != null) {
            this.logger.fine("Mention \"" + ent + "\" mapped to tree: " + GenericDataSetReader.printTree(exactMatch));
            return this.safeHead(exactMatch);
        }
        int approximateness = 0;
        ArrayList<CoreLabel> extentTokens = new ArrayList<CoreLabel>();
        extentTokens.add(GenericDataSetReader.initCoreLabel("It"));
        extentTokens.add(GenericDataSetReader.initCoreLabel("was"));
        int ADDED_WORDS = 2;
        for (int i = ent.getExtentTokenStart(); i < ent.getExtentTokenEnd(); ++i) {
            CoreLabel label = tokens.get(i);
            if (!"-".equals(label.word())) {
                extentTokens.add(tokens.get(i));
                continue;
            }
            ++approximateness;
        }
        extentTokens.add(GenericDataSetReader.initCoreLabel("."));
        ParserConstraint constraint = new ParserConstraint(2, extentTokens.size() - 1, ".*");
        List<ParserConstraint> constraints = Collections.singletonList(constraint);
        Tree tree = this.parse(extentTokens, constraints);
        this.logger.fine("No exact match found. Local parse:\n" + tree.pennString());
        GenericDataSetReader.convertToCoreLabels(tree);
        tree.indexSpans(ent.getExtentTokenStart() - 2);
        Tree subtree = this.findPartialSpan(tree, ent.getExtentTokenStart());
        Tree extentHead = this.safeHead(subtree);
        this.logger.fine("Head is: " + extentHead);
        assert (extentHead != null);
        CoreLabel l = (CoreLabel)extentHead.label();
        Tree realHead = this.funkyFindLeafWithApproximateSpan(root, l.value(), (Integer)l.get(CoreAnnotations.BeginIndexAnnotation.class), approximateness);
        if (realHead != null) {
            this.logger.fine("Chosen head: " + realHead);
        }
        return realHead;
    }

    private Tree findPartialSpan(Tree current, int start) {
        CoreLabel label = (CoreLabel)current.label();
        int startIndex = (Integer)label.get(CoreAnnotations.BeginIndexAnnotation.class);
        if (startIndex == start) {
            this.logger.fine("findPartialSpan: Returning " + current);
            return current;
        }
        for (Tree kid : current.children()) {
            CoreLabel kidLabel = (CoreLabel)kid.label();
            int kidStart = (Integer)kidLabel.get(CoreAnnotations.BeginIndexAnnotation.class);
            int kidEnd = (Integer)kidLabel.get(CoreAnnotations.EndIndexAnnotation.class);
            if (kidStart > start || kidEnd <= start) continue;
            return this.findPartialSpan(kid, start);
        }
        throw new RuntimeException("Shouldn't happen: " + start + " " + current);
    }

    private Tree funkyFindLeafWithApproximateSpan(Tree root, String token, int index, int approximateness) {
        this.logger.fine("Looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString());
        List leaves = root.getLeaves();
        for (Tree leaf : leaves) {
            CoreLabel label = (CoreLabel)CoreLabel.class.cast(leaf.label());
            int ind = (Integer)label.get(CoreAnnotations.BeginIndexAnnotation.class);
            if (!token.equals(leaf.value()) || ind < index || ind > index + approximateness) continue;
            return leaf;
        }
        this.logger.severe("GenericDataSetReader: WARNING: Failed to find head token");
        this.logger.severe("  when looking for " + token + " at pos " + index + " plus upto " + approximateness + " in tree: " + root.pennString());
        return null;
    }

    public Tree originalFindSyntacticHead(EntityMention ent, Tree root, List<CoreLabel> tokens) {
        this.logger.fine("Searching for tree matching " + ent);
        Tree exactMatch = GenericDataSetReader.findTreeWithSpan(root, ent.getExtentTokenStart(), ent.getExtentTokenEnd());
        if (exactMatch != null) {
            this.logger.fine("Mention \"" + ent + "\" mapped to tree: " + GenericDataSetReader.printTree(exactMatch));
            return this.safeHead(exactMatch);
        }
        ArrayList<CoreLabel> extentTokens = new ArrayList<CoreLabel>();
        for (int i = ent.getExtentTokenStart(); i < ent.getExtentTokenEnd(); ++i) {
            extentTokens.add(tokens.get(i));
        }
        Tree tree = this.parse(extentTokens);
        this.logger.fine("No exact match found. Local parse:\n" + tree.pennString());
        GenericDataSetReader.convertToCoreLabels(tree);
        tree.indexSpans(ent.getExtentTokenStart());
        Tree extentHead = this.safeHead(tree);
        assert (extentHead != null);
        CoreLabel l = (CoreLabel)extentHead.label();
        Tree realHead = GenericDataSetReader.findTreeWithSpan(root, (Integer)l.get(CoreAnnotations.BeginIndexAnnotation.class), (Integer)l.get(CoreAnnotations.EndIndexAnnotation.class));
        assert (realHead != null);
        return realHead;
    }

    private static CoreLabel initCoreLabel(String token) {
        CoreLabel label = new CoreLabel();
        label.setWord(token);
        label.setValue(token);
        label.set(CoreAnnotations.TextAnnotation.class, token);
        label.set(CoreAnnotations.ValueAnnotation.class, token);
        return label;
    }

    protected Tree parseStrings(List<String> tokens) {
        ArrayList<CoreLabel> labels = new ArrayList<CoreLabel>();
        for (String t : tokens) {
            CoreLabel l = GenericDataSetReader.initCoreLabel(t);
            labels.add(l);
        }
        return this.parse(labels);
    }

    protected Tree parse(List<CoreLabel> tokens) {
        return this.parse(tokens, null);
    }

    protected Tree parse(List<CoreLabel> tokens, List<ParserConstraint> constraints) {
        Annotation sent = new Annotation("");
        sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
        sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
        Annotation doc = new Annotation("");
        List sents = new ArrayList<Annotation>();
        sents.add(sent);
        doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
        this.getParser().annotate(doc);
        sents = (List)doc.get(CoreAnnotations.SentencesAnnotation.class);
        return (Tree)((CoreMap)sents.get(0)).get(TreeCoreAnnotations.TreeAnnotation.class);
    }

    private static Tree findTreeWithSpan(Tree tree, int start, int end) {
        CoreLabel l = (CoreLabel)tree.label();
        if (l != null && l.has(CoreAnnotations.BeginIndexAnnotation.class) && l.has(CoreAnnotations.EndIndexAnnotation.class)) {
            int myStart = (Integer)l.get(CoreAnnotations.BeginIndexAnnotation.class);
            int myEnd = (Integer)l.get(CoreAnnotations.EndIndexAnnotation.class);
            if (start == myStart && end == myEnd) {
                return tree;
            }
            if (end < myStart) {
                return null;
            }
            if (start >= myEnd) {
                return null;
            }
        }
        for (Tree kid : tree.children()) {
            Tree ret;
            if (kid == null || (ret = GenericDataSetReader.findTreeWithSpan(kid, start, end)) == null) continue;
            return ret;
        }
        return null;
    }
}

