package com.gemantic.parser.extractor.impl;

import com.gemantic.parser.extractor.ArticleExtractor;
import com.gemantic.parser.htmltree.GRHtmlTree;
import com.gemantic.parser.model.Article;
import com.gemantic.parser.model.Paragraph;
import com.gemantic.parser.rule.TagRule;
import com.gemantic.parser.util.BaseUtil;
import java.util.List;
import org.apache.log4j.Logger;
import org.htmlcleaner.TagNode;

/* loaded from: input_file:com/gemantic/parser/extractor/impl/ArticleTitleReExtractor.class */
public class ArticleTitleReExtractor implements ArticleExtractor {
    private static Logger logger = Logger.getLogger(ArticleTitleReExtractor.class);
    boolean debug = true;
    private TagRule tagrule = TagRule.getInstance();

    @Override // com.gemantic.parser.extractor.ArticleExtractor
    public Article extract(Article article, TagNode tagNode, GRHtmlTree gRHtmlTree, List list) {
        if (article.getPageType() != 1 || article.getTitlePos() > 0 || article.getContentPos() <= 0 || article.getSiteCatPos() <= 0) {
            return null;
        }
        reExtractTitle(article, gRHtmlTree, list);
        return null;
    }

    private boolean reExtractTitle(Article article, GRHtmlTree gRHtmlTree, List list) {
        boolean z = false;
        if (this.debug) {
            logger.debug("article.getTimePos()=" + article.getTimePos() + ";-->article.getSiteCatPos()=" + article.getSiteCatPos());
        }
        int i = -1;
        if (article.getSiteCatPos() > 0) {
            i = gRHtmlTree.getMaxID(gRHtmlTree.getItemByIndex(article.getSiteCatPos()), -1);
            if (this.debug) {
                logger.debug("sitecat_begin=" + i);
            }
        }
        int timePos = article.getTimePos() > 0 ? article.getTimePos() : -1;
        if (article.getSiteCatPos() > 0 && article.getSourcePos() > article.getSiteCatPos() && article.getTimePos() > article.getSiteCatPos()) {
            timePos = Math.min(article.getSourcePos(), article.getTimePos());
        }
        if (this.debug) {
            logger.debug("time_begin=" + timePos);
        }
        if (i > 0) {
            if (timePos > 0) {
                for (int i2 = timePos - 1; i2 >= i; i2--) {
                    z = chooseTitle(article, findParagraph(list, i2));
                    if (z) {
                        break;
                    }
                }
            } else {
                for (int contentPos = article.getContentPos() - 1; contentPos >= i; contentPos--) {
                    z = chooseTitle(article, findParagraph(list, contentPos));
                    if (z) {
                        break;
                    }
                }
            }
        }
        return z;
    }

    private Paragraph findParagraph(List list, int i) {
        if (list == null) {
            return null;
        }
        for (int i2 = 0; i2 < list.size(); i2++) {
            Paragraph paragraph = (Paragraph) list.get(i2);
            if (paragraph.getNodeID() == i) {
                return paragraph;
            }
        }
        return null;
    }

    private boolean chooseTitle(Article article, Paragraph paragraph) {
        if (paragraph == null) {
            return false;
        }
        double linkWordLen = paragraph.getLinkWordLen() / (paragraph.getWordCNum() > 2 ? paragraph.getWordCNum() : 1.0d);
        String trim = paragraph.getContentS().trim();
        if (trim.length() < 7 || trim.length() > 35 || linkWordLen > 0.2d || paragraph.getTagNum() >= 2) {
            return false;
        }
        int EngCharStat = BaseUtil.EngCharStat(trim);
        int CDotStatStrit = this.tagrule.CDotStatStrit(trim);
        if (this.debug) {
            logger.debug("EE:nArrayId=" + paragraph.getNodeID() + ";node=" + paragraph.getTagName() + "; length=" + trim.length() + "; echarcount=" + EngCharStat + ";BlockContent=" + trim);
        }
        if (EngCharStat >= 0.5d * trim.length() || CDotStatStrit > 0 || trim.indexOf(10) >= 0) {
            return false;
        }
        article.setTitle(trim, paragraph.getNodeID());
        return true;
    }
}
