package com.gemantic.parser.mytest;

import com.gemantic.parser.impl.NewsParser;
import com.gemantic.parser.model.Article;
import com.gemantic.parser.util.ArticleUtil;
import com.gemantic.parser.util.BaseUtil;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.springframework.context.support.ClassPathXmlApplicationContext;

/* loaded from: input_file:com/gemantic/parser/mytest/TestExtractTitle.class */
public class TestExtractTitle {
    private List<SourceItem> list_item;
    private static Logger logger = Logger.getLogger(TestExtractTitle.class);
    private NewsParser parser;

    public TestExtractTitle() {
        this.list_item = null;
        this.list_item = new ArrayList();
        init();
    }

    private void init() {
        try {
            this.parser = (NewsParser) new ClassPathXmlApplicationContext("classpath:META-INF/applicationContext_parser.xml").getBean("NewsParser");
        } catch (Throwable th) {
            logger.error("cant get spring classpath:META-INF/applicationContext_parser.xml");
            th.printStackTrace();
            logger.error(th.getMessage());
        }
    }

    public static void main(String[] strArr) {
        TestExtractTitle testExtractTitle = new TestExtractTitle();
        testExtractTitle.parser.setDebug(false);
        testExtractTitle.constructResult();
        int i = 0;
        for (int i2 = 0; i2 < testExtractTitle.list_item.size(); i2++) {
            SourceItem sourceItem = testExtractTitle.list_item.get(i2);
            String readFile = BaseUtil.readFile(sourceItem.getFilename(), sourceItem.getCharset());
            logger.debug("anchor" + i2 + " =" + sourceItem.getAnchor());
            Article parse = testExtractTitle.parser.parse(readFile, sourceItem.getUrl(), 0L, sourceItem.getAnchor());
            if (parse == null) {
                logger.warn("parse or extarct error, item =" + i2 + ";src = " + sourceItem.getFilename());
            } else {
                ArticleUtil.printArticleInfo(i2, parse);
                if (testExtractTitle.compareArticle(sourceItem.getArticle(), parse, i2, sourceItem)) {
                    logger.debug("equal,pos=" + i2);
                } else {
                    logger.warn("NOT equal,pos=" + i2);
                    i++;
                }
            }
        }
        logger.warn("Error Count=" + i);
    }

    public void constructResult() {
        SourceItem sourceItem = new SourceItem("src/test/resources/html_title/china-crb1.html", "GBK", "http://www.china-crb.cn/HTML/2007/zhili/200915028.html");
        Article article = new Article();
        article.setTitle("杨惠妍：承担财富的重量与质量", 110);
        article.setSiteCat("首页>治理", 98);
        article.setSource("", -1);
        article.setTime("2009-12-14 00:00", 118);
        article.setPageType(1);
        article.setContentPos(121);
        sourceItem.setArticle(article);
        this.list_item.add(sourceItem);
        SourceItem sourceItem2 = new SourceItem("src/test/resources/html_title/infomorning1.html", "GBK", "http://www.infomorning.com/articleview.asp?id=3486");
        Article article2 = new Article();
        article2.setTitle("证监会撤销立立电子IPO核准 中国证券市场第一例", 71);
        article2.setSiteCat("信息早报网络版首页>财经新闻", 60);
        article2.setSource("中国新闻网", 78);
        article2.setTime("2009-04-03 00:00", 79);
        article2.setPageType(1);
        article2.setContentPos(85);
        sourceItem2.setArticle(article2);
        this.list_item.add(sourceItem2);
        SourceItem sourceItem3 = new SourceItem("src/test/resources/html_title/cnnb1.html", "GBK", "http://stock.cnnb.com.cn/content/channel/tglj/c157/2009/1230/448466998.shtml");
        Article article3 = new Article();
        article3.setTitle("大连友谊表示嘉威德投资不参与友谊集团股权挂牌竞买", 252);
        article3.setTime("2009-12-30 08:22", 254);
        article3.setPageType(1);
        article3.setContentPos(259);
        sourceItem3.setArticle(article3);
        this.list_item.add(sourceItem3);
        SourceItem sourceItem4 = new SourceItem("src/test/resources/html_title/jgstock1.html", "GBK", "http://www.jgstock.com/guojicaijing/095141782B0H3F6G2D7EE81H.html");
        Article article4 = new Article();
        article4.setTitle("伯南克：“压力测试”有助夯实公众信心", 75);
        article4.setSiteCat("首页>金股证券资讯>国际财经>正文", 64);
        article4.setTime("2009-05-14 17:53", 83);
        article4.setPageType(1);
        article4.setContentPos(84);
        sourceItem4.setArticle(article4);
        this.list_item.add(sourceItem4);
        SourceItem sourceItem5 = new SourceItem("src/test/resources/html_title/list-jrqhw1.html", "GBK", "http://jrqhw.com/InstantNews/hyxw/List_220.html");
        Article article5 = new Article();
        article5.setPageType(2);
        sourceItem5.setArticle(article5);
        this.list_item.add(sourceItem5);
        SourceItem sourceItem6 = new SourceItem("src/test/resources/html_title/list-cfi1.html", "utf-8", "http://industry.cfi.cn/BCA0A1A1653A1655,DGS_A0A1A1653A1654_my3.html");
        Article article6 = new Article();
        article6.setPageType(2);
        sourceItem6.setArticle(article6);
        this.list_item.add(sourceItem6);
        SourceItem sourceItem7 = new SourceItem("src/test/resources/html_title/taihainet1.html", "GBK", "http://888.taihainet.com/1455/100112/7099223,00.php", "“广发概念”还有多少个涨停？ ");
        Article article7 = new Article();
        article7.setTitle("“广发概念”还有多少个涨停？", 50);
        article7.setSiteCat("台海网>证券频道>股市聚焦", 39);
        article7.setTime("2010-01-12 07:56", 52);
        article7.setPageType(1);
        article7.setContentPos(54);
        sourceItem7.setArticle(article7);
        this.list_item.add(sourceItem7);
        SourceItem sourceItem8 = new SourceItem("src/test/resources/html_title/yikuo1.html", "GBK", "http://goo.yikuo.com/news/2010-01-11/1077_2010111162656107716110433.html", "股指期货助推银行股价值回归");
        Article article8 = new Article();
        article8.setTitle("股指期货助推银行股价值回归(01/11)", 249);
        article8.setTime("2010-1-11 16:21", 259);
        article8.setPageType(1);
        article8.setContentPos(291);
        sourceItem8.setArticle(article8);
        this.list_item.add(sourceItem8);
        SourceItem sourceItem9 = new SourceItem("src/test/resources/html_title/jrj1.html", "GBK", "http://finance.jrj.com.cn/2010/01/1902256827037.shtml", "航空航天等战略新兴产业总体部署将出台");
        Article article9 = new Article();
        article9.setTitle("航空航天等战略新兴产业总体部署将出台", 89);
        article9.setSiteCat("金融界首页>财经频道>国内财经>正文", 54);
        article9.setSource("证券日报", 93);
        article9.setTime("2010-01-19 02:25", 91);
        article9.setPageType(1);
        article9.setContentPos(99);
        sourceItem9.setArticle(article9);
        this.list_item.add(sourceItem9);
        SourceItem sourceItem10 = new SourceItem("src/test/resources/html_title/163-1.html", "GBK", "http://money.163.com/09/1103/04/5N5SR75D002534NV.html", "");
        Article article10 = new Article();
        article10.setTitle("南车集团获土耳其3.5亿元轻轨订单", 76);
        article10.setSiteCat("网易>财经频道>正文", 70);
        article10.setSource("第一财经日报", 78);
        article10.setTime("2009-11-03 04:12", 77);
        article10.setPageType(1);
        article10.setContentPos(83);
        sourceItem10.setArticle(article10);
        this.list_item.add(sourceItem10);
    }

    public boolean compareArticle(Article article, Article article2, int i, SourceItem sourceItem) {
        boolean z;
        boolean z2 = true;
        if (article.getPageType() == 2) {
            if (article2.getPageType() != 2) {
                logger.warn(" PageType not equal, pos = " + i + ";url =" + sourceItem.getUrl());
                z = false;
            } else {
                z = true;
            }
            return z;
        }
        if (article.getTitlePos() != article2.getTitlePos() || !article.getTitle().equals(article2.getTitle())) {
            logger.warn(" title not equal, pos = " + i + ";url =" + sourceItem.getUrl());
            z2 = false;
        }
        if (article.getSiteCatPos() != article2.getSiteCatPos() || !article.getSiteCat().equals(article2.getSiteCat())) {
            logger.warn(" SiteCat not equal, pos = " + i + ";url =" + sourceItem.getUrl());
            z2 = false;
        }
        if (article.getSourcePos() != article2.getSourcePos() || !article.getSource().equals(article2.getSource())) {
            logger.warn(" Source not equal, pos = " + i + ";url =" + sourceItem.getUrl());
            z2 = false;
        }
        if (article.getTimePos() != article2.getTimePos() || !article.getTime().equals(article2.getTime())) {
            logger.warn(" Time not equal, pos = " + i + ";url =" + sourceItem.getUrl());
            z2 = false;
        }
        if (article.getContentPos() != article2.getContentPos()) {
            logger.warn(" Content not equal, pos = " + i + ";url =" + sourceItem.getUrl());
            z2 = false;
        }
        if (article.getPageType() != article2.getPageType()) {
            logger.warn(" PageType not equal, pos = " + i + ";url =" + sourceItem.getUrl());
            z2 = false;
        }
        return z2;
    }
}
