package com.gemantic.parser.extractor.impl;

import com.gemantic.parser.extractor.impl.ArticleTitleExtractor;
import com.gemantic.parser.htmltree.GRHtmlTree;
import com.gemantic.parser.model.Article;
import com.gemantic.parser.model.Paragraph;
import com.gemantic.parser.util.BaseUtil;
import com.gemantic.parser.util.RegexUtil;
import com.gemantic.parser.util.TagNodeUtil;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.log4j.Logger;
import org.htmlcleaner.TagNode;

/* loaded from: input_file:com/gemantic/parser/extractor/impl/PolicyTitleExtractor.class */
public class PolicyTitleExtractor extends ArticleTitleExtractor {
    private static Logger logger = Logger.getLogger(PolicyTitleExtractor.class);

    @Override // com.gemantic.parser.extractor.impl.ArticleTitleExtractor, com.gemantic.parser.extractor.ArticleExtractor
    public Article extract(Article article, TagNode tagNode, GRHtmlTree gRHtmlTree, List list) {
        article.setHtmlTitle(TagNodeUtil.extractHtmlTitle(tagNode));
        extractTitle(article, gRHtmlTree, list);
        return article;
    }

    @Override // com.gemantic.parser.extractor.impl.ArticleTitleExtractor
    protected boolean extractTitle(Article article, GRHtmlTree gRHtmlTree, List list) {
        if (article.getHtmlTitle() == null) {
            return false;
        }
        String unescapeHtml = BaseUtil.unescapeHtml(article.getHtmlTitle());
        String unescapeHtml2 = BaseUtil.unescapeHtml(article.getAnchor());
        if (unescapeHtml2 == null) {
            unescapeHtml2 = "";
        }
        String replaceAll = unescapeHtml2.replaceAll("\\.\\.\\.", "");
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            Paragraph paragraph = (Paragraph) list.get(i);
            String trim = paragraph.getContentS().trim();
            if (trim.length() >= 4 && trim.length() <= 100 && (paragraph.getLinkNum() < 1 || "h1".equals(paragraph.getTagName()))) {
                String replaceDateTime = RegexUtil.replaceDateTime(BaseUtil.unescapeHtml(trim));
                int indexOf = unescapeHtml.indexOf(replaceDateTime);
                int indexOf2 = replaceAll.length() > 0 ? replaceDateTime.indexOf(replaceAll) : -1;
                if (indexOf >= 0 || indexOf2 >= 0) {
                    HtmlNodeItem htmlNodeItem = new HtmlNodeItem(trim, paragraph.getTagName(), paragraph.getNodeID());
                    if (indexOf >= 0) {
                        htmlNodeItem.setScore(htmlNodeItem.getScore() + 100);
                    }
                    if (indexOf2 >= 0) {
                        htmlNodeItem.setScore(htmlNodeItem.getScore() + 100);
                    }
                    arrayList.add(htmlNodeItem);
                }
            }
        }
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            calHtmlNodeItemScore((HtmlNodeItem) arrayList.get(i2));
        }
        Collections.sort(arrayList, new ArticleTitleExtractor.HtmlNodeItemCompare(this));
        logger.debug("==title node txt==\n" + arrayList.toString());
        boolean z = false;
        if (arrayList.size() > 0) {
            HtmlNodeItem htmlNodeItem2 = (HtmlNodeItem) arrayList.get(0);
            article.setTitle(htmlNodeItem2.getTagValue(), htmlNodeItem2.getPos());
            z = true;
        }
        return z;
    }
}
