package com.gemantic.parser.check.impl;

import com.gemantic.parser.check.ArticleChecker;
import com.gemantic.parser.impl.NewsParser;
import com.gemantic.parser.model.Article;
import com.gemantic.parser.model.Paragraph;
import com.gemantic.parser.rule.TagRule;
import com.gemantic.parser.util.ArticleUtil;
import com.gemantic.parser.util.BaseUtil;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/gemantic/parser/check/impl/ArticleContentChecker.class */
public class ArticleContentChecker implements ArticleChecker {
    private static Logger logger = Logger.getLogger(ArticleContentChecker.class);
    private static HashMap<String, String> hm_copyright = new HashMap<>();
    boolean debug = false;
    private TagRule tagrule = TagRule.getInstance();

    @Override // com.gemantic.parser.check.ArticleChecker
    public Article check(Article article) {
        String host = article.getHost();
        if (host == null) {
            return article;
        }
        Article cutFirstWasteWord = cutFirstWasteWord(cutFirstCopyRight(cutLastWasteWord(cutLastCopyRight(cutLastWasteWord(article)))));
        if (host.indexOf("hexun.com") >= 0 || host.indexOf("stock.baidu.com") >= 0) {
            List paragraphs = cutFirstWasteWord.getParagraphs();
            for (int i = 0; i < paragraphs.size(); i++) {
                Paragraph paragraph = (Paragraph) paragraphs.get(i);
                paragraph.resetContentNew(paragraph.getContentS().replaceAll("（和讯财经原创）", ""));
                paragraph.resetContentTagNew(paragraph.getContentTagS().replaceAll("（和讯财经原创）", ""));
            }
        }
        ArticleUtil.sumArticleParagraph(cutFirstWasteWord);
        NewsParser.htmlPageType(cutFirstWasteWord);
        return cutFirstWasteWord;
    }

    private Article cutLastWasteWord(Article article) {
        LinkedList linkedList = (LinkedList) article.getParagraphs();
        if (linkedList != null) {
            int size = linkedList.size() - 1;
            while (true) {
                if (size < 0) {
                    break;
                }
                Paragraph paragraph = (Paragraph) linkedList.get(size);
                String contentS = paragraph.getContentS();
                if (contentS != null && !contentS.isEmpty()) {
                    int max = Math.max(Math.max(contentS.lastIndexOf("。"), contentS.lastIndexOf("！")), Math.max(contentS.lastIndexOf("."), contentS.lastIndexOf("!")));
                    if (max >= 0) {
                        paragraph.resetContentNew(contentS.substring(0, max + "。".length()));
                        paragraph.resetContentTagNew(contentS.substring(0, max + "。".length()));
                        break;
                    }
                    article.removeParagraph(paragraph);
                } else {
                    article.removeParagraph(paragraph);
                }
                size--;
            }
        }
        return article;
    }

    private Article cutLastCopyRight(Article article) {
        List paragraphs = article.getParagraphs();
        int i = 1;
        int size = paragraphs.size() - 1;
        while (true) {
            if (size < 0 || i > 4) {
                break;
            }
            Paragraph paragraph = (Paragraph) paragraphs.get(size);
            String contentS = paragraph.getContentS();
            if (Math.max(contentS.lastIndexOf("声明】"), contentS.lastIndexOf("声明：")) >= 0) {
                article.removeParagraph(paragraph);
                break;
            }
            i++;
            size--;
        }
        return article;
    }

    private Article cutFirstCopyRight(Article article) {
        if (article.getHost().indexOf("sina.com") >= 0) {
            List paragraphs = article.getParagraphs();
            int i = 0;
            while (true) {
                if (i >= paragraphs.size() || i > 2) {
                    break;
                }
                Paragraph paragraph = (Paragraph) paragraphs.get(i);
                String contentS = paragraph.getContentS();
                if (Math.max(contentS.lastIndexOf("新浪提示："), contentS.lastIndexOf("新浪声明：")) >= 0) {
                    article.removeParagraph(paragraph);
                    break;
                }
                i++;
            }
        }
        return article;
    }

    private Article cutFirstWasteWord(Article article) {
        LinkedList linkedList = (LinkedList) article.getParagraphs();
        if (linkedList == null) {
            return article;
        }
        for (int i = 0; i < linkedList.size(); i++) {
            if (this.debug) {
                logger.debug("paraContent" + i + "=" + ((Paragraph) linkedList.get(i)).getContentS() + "**");
            }
        }
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < linkedList.size() && i2 < 2; i2++) {
            Paragraph paragraph = (Paragraph) linkedList.get(i2);
            String contentS = paragraph.getContentS();
            if (this.debug) {
                logger.debug("in 111 paraContent" + i2 + "=" + contentS + "**");
            }
            if (contentS != null && !contentS.isEmpty() && contentS.trim().length() > 0 && (contentS.endsWith("\n") || (i2 < linkedList.size() - 1 && ((Paragraph) linkedList.get(i2 + 1)).getContentS().startsWith("\n")))) {
                if (this.tagrule.firstOfPunctation(contentS) >= 0) {
                    break;
                }
                arrayList.add(paragraph);
                if (this.debug) {
                    logger.debug("paraContent" + i2 + " will be deleted!!!");
                }
            }
        }
        for (int size = arrayList.size() - 1; size >= 0; size--) {
            article.removeParagraph((Paragraph) arrayList.get(size));
        }
        boolean z = false;
        for (int i3 = 0; i3 < linkedList.size() && i3 < 3 && !z; i3++) {
            try {
                Paragraph paragraph2 = (Paragraph) linkedList.get(i3);
                String contentS2 = paragraph2.getContentS();
                if (this.debug) {
                    logger.debug("in 222 paraContent" + i3 + "=" + contentS2);
                }
                int firstOfPunctation = this.tagrule.firstOfPunctation(contentS2);
                for (int i4 = firstOfPunctation; i4 > 0 && firstOfPunctation > 0; i4--) {
                    if (contentS2.charAt(i4) == ' ' || contentS2.charAt(i4) == 12288 || contentS2.charAt(i4) == ')' || contentS2.charAt(i4) == 65289) {
                        String substring = contentS2.substring(0, i4 - 1);
                        if (this.debug) {
                            logger.debug("cuts=" + substring + "**");
                        }
                        if (!BaseUtil.containEngChar(substring) && substring.length() < 30) {
                            paragraph2.resetContentNew(contentS2.substring(i4 + 1));
                            paragraph2.resetContentTagNew(contentS2.substring(i4 + 1));
                            z = true;
                        }
                    }
                }
            } catch (Exception e) {
            }
        }
        return article;
    }

    static {
        hm_copyright.put("hexun.com", "声明】");
        hm_copyright.put("sina.com.cn", "声明：");
        hm_copyright.put("ifeng.com", "免责声明：");
    }
}
