package com.gemantic.parser.util;

import com.gemantic.parser.model.Article;
import com.gemantic.parser.model.Paragraph;

/* loaded from: input_file:com/gemantic/parser/util/ArticleUtil.class */
public class ArticleUtil {
    private static final String brPatternString = "(<br>)|(<br/>)|(<br />)|(<br/ >)";
    private static final String pPatternString = "<p>[\u3000\\s]+";
    private static final String linePatternString = "\n";

    public static String getArticleStatInfo(Article article) {
        String str = "";
        Paragraph paragraphSum = article.getParagraphSum();
        if (paragraphSum != null) {
            int aNum = paragraphSum.getANum();
            int linkWordLen = paragraphSum.getLinkWordLen();
            if (aNum <= 0) {
                aNum = 1;
            }
            str = str + "链接数=" + paragraphSum.getANum() + ";链接的字数=" + linkWordLen + "; 平均链接长度=" + (linkWordLen / aNum) + "; <br>\n";
        }
        return str;
    }

    public static void sumArticleParagraph(Article article) {
        Paragraph paragraph = new Paragraph();
        for (int i = 0; i < article.getParagraphs().size(); i++) {
            Paragraph paragraph2 = (Paragraph) article.getParagraphs().get(i);
            paragraph.setWordNum(paragraph.getWordNum() + paragraph2.getWordNum());
            paragraph.setWordCNum(paragraph.getWordCNum() + paragraph2.getWordCNum());
            paragraph.setDotCNum(paragraph.getDotCNum() + paragraph2.getDotCNum());
            paragraph.setLinkNum(paragraph.getLinkNum() + paragraph2.getLinkNum());
            paragraph.setANum(paragraph.getANum() + paragraph2.getANum());
            paragraph.setImgNum(paragraph.getImgNum() + paragraph2.getImgNum());
            paragraph.setLinkWordLen(paragraph.getLinkWordLen() + paragraph2.getLinkWordLen());
            paragraph.setTagNum(paragraph.getTagNum() + paragraph2.getTagNum());
            paragraph.appendContent(paragraph2.getContent());
            paragraph.appendContentTag(paragraph2.getContentTag());
        }
        article.setContent(replaceMultiEnter(paragraph.getContent().toString()));
        article.setContent(RegexUtil.replaceStockCode(article.getContent()));
        article.setContentTag(replaceBRwithP(paragraph.getContentTag().toString()));
        article.setContentTag(RegexUtil.replaceStockCode(article.getContentTag()));
        if (article.getParagraphs().size() == 1 && article.getContentTag().indexOf("<p>") < 0) {
            article.setContentTag("<p>" + article.getContentTag() + "</p>");
        }
        article.setParagraphSum(paragraph);
    }

    public static String replaceMultiEnter(String str) {
        return str.replaceAll("\\n+", linePatternString).replaceAll("\u3000", " ");
    }

    public static String replaceBRwithP(String str) {
        String replaceAll = str.replaceAll(brPatternString, "</p><p>").replaceAll(linePatternString, "</p><p>");
        if (replaceAll.indexOf("<p>") > replaceAll.indexOf("</p>")) {
            replaceAll = "<p>" + replaceAll;
        }
        if (replaceAll.lastIndexOf("<p>") > replaceAll.lastIndexOf("</p>")) {
            replaceAll = replaceAll + "</p>";
        }
        String trim = replaceAll.trim();
        if (trim.endsWith("<p></p>")) {
            trim = trim.substring(0, trim.length() - "<p></p>".length());
        }
        return trim.replaceAll(pPatternString, "<p>").replaceAll("<p></p>", "").replaceAll("<strong>[\u3000\\s]+", "<strong>").replaceAll("<b>[\u3000\\s]+", "<b>").replaceAll("</p>", "</p>\n");
    }

    public static void printArticleInfo(Article article) {
        printArticleInfo(1, article);
    }

    public static void printArticleInfo(int i, Article article) {
        System.out.println("Title" + i + "=" + article.getTitle());
        System.out.println("TitlePos" + i + "=" + article.getTitlePos());
        System.out.println("Time" + i + "=" + article.getTime());
        System.out.println("TimePos" + i + "=" + article.getTimePos());
        System.out.println("TimeBak" + i + "=" + article.getTimeBak());
        System.out.println("SiteCat" + i + "=" + article.getSiteCat());
        System.out.println("SiteCatPos" + i + "=" + article.getSiteCatPos());
        System.out.println("Source" + i + "=" + article.getSource());
        System.out.println("SourcePos" + i + "=" + article.getSourcePos());
        System.out.println("PageType" + i + "=" + article.getPageTypeName());
        System.out.println("ContentPos" + i + "=" + article.getContentPos());
        System.out.println("Content" + i + "=" + article.getContent());
        System.out.println("ContentTag" + i + "=" + article.getContentTag());
    }
}
