package com.gemantic.parser;

import com.gemantic.parser.impl.NewsParser;
import com.gemantic.parser.model.Article;
import com.gemantic.parser.util.ArtificialRulesFilter;
import com.gemantic.parser.util.BaseUtil;
import com.gemantic.parser.util.GDocUtil;
import com.gemantic.parser.util.ParserFilter;
import com.gemantic.parser.util.ParserLogUtil;
import com.gemantic.workflow.doc.GDoc;
import com.gemantic.workflow.operator.Operator;
import java.io.IOException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.log4j.Logger;
import org.springframework.context.support.ClassPathXmlApplicationContext;

/* loaded from: input_file:com/gemantic/parser/NewsOperator.class */
public class NewsOperator extends Operator {
    private NewsParser newsParser;
    private ParserFilter parserFilter;
    private static Logger logger = Logger.getLogger(NewsOperator.class);
    private static final List<String> domains = new ArrayList();
    private static final List<String> domainNames = new ArrayList();

    public NewsOperator() {
        init();
    }

    public GDoc process(GDoc gDoc) {
        logger.info("start new operator for parse");
        if (gDoc.getUrl() == null) {
            logger.info("news operator,gdoc url is null ");
            return gDoc;
        }
        String str = gDoc.get("is_navigation");
        if (str != null && str.equals("true")) {
            ParserLogUtil.logNavPageInfo(gDoc.getUrl(), gDoc.getSegmentName());
            logger.info(" new operator, navigation page return");
            return gDoc;
        }
        Article parseContent = this.newsParser.parseContent(ArtificialRulesFilter.rulesFilter(GDocUtil.convertGdoc2HtmlContent(gDoc)));
        if (parseContent != null) {
            if ((parseContent.getPageType() == 1 || ArtificialRulesFilter.needDetail(gDoc)) && !BaseUtil.emptyStr(parseContent.getTitle()) && !BaseUtil.emptyStr(parseContent.getContent())) {
                GDocUtil.addArticleField2GDoc(parseContent, gDoc);
            }
            try {
                String host = new URL(gDoc.getUrl()).getHost();
                String str2 = null;
                int i = 0;
                while (true) {
                    if (i >= domains.size()) {
                        break;
                    }
                    if (host.endsWith(domains.get(i))) {
                        str2 = domainNames.get(i);
                        break;
                    }
                    i++;
                }
                if (str2 == null) {
                    logger.warn("unknown domain: " + host);
                } else {
                    logger.info("add cs_cite_name to gdoc: hostName(" + host + "), domainName(" + str2 + ")");
                    gDoc.add("cs_site_name", str2);
                }
                ParserLogUtil.logArticleInfo(parseContent, gDoc.getSegmentName(), gDoc.get("navigation_url"), ParserLogUtil.LOGTYPE_NEWS);
            } catch (Exception e) {
                logger.warn("malformed url: " + gDoc.getUrl());
                throw new RuntimeException(e);
            }
        } else {
            ParserLogUtil.logErrUrl(gDoc.getUrl(), gDoc.getSegmentName());
            logger.info("news operator, article is null");
        }
        return gDoc;
    }

    private void init() {
        try {
            ClassPathXmlApplicationContext classPathXmlApplicationContext = new ClassPathXmlApplicationContext("classpath:META-INF/applicationContext_parser.xml");
            this.newsParser = (NewsParser) classPathXmlApplicationContext.getBean("NewsParser");
            this.parserFilter = (ParserFilter) classPathXmlApplicationContext.getBean("parserFilterBean");
        } catch (Throwable th) {
            logger.error("cant get spring classpath:META-INF/applicationContext_parser.xml");
            th.printStackTrace();
            logger.error(th.getMessage());
        }
    }

    public static void main(String[] strArr) throws IOException {
        for (int i = 0; i < domains.size(); i++) {
            System.out.printf("%s --- %s\n", domains.get(i), domainNames.get(i));
        }
    }

    public NewsParser getNewsParser() {
        return this.newsParser;
    }

    public void setNewsParser(NewsParser newsParser) {
        this.newsParser = newsParser;
    }

    public ParserFilter getParserFilter() {
        return this.parserFilter;
    }

    public void setParserFilter(ParserFilter parserFilter) {
        this.parserFilter = parserFilter;
    }

    static {
        Connection connection = null;
        Properties properties = new Properties();
        try {
            try {
                properties.load(NewsOperator.class.getResourceAsStream("/classifier_conf"));
                Class.forName("com.mysql.jdbc.Driver");
                connection = DriverManager.getConnection(properties.getProperty("DB_CONNECTION"), properties.getProperty("DB_USERNAME"), properties.getProperty("DB_PASSWORD"));
                logger.info("load domains and domainNames from database");
                logger.info("connect to: " + properties.getProperty("DB_CONNECTION"));
                logger.info("username: " + properties.getProperty("DB_USERNAME"));
                logger.info("password: " + properties.getProperty("DB_PASSWORD"));
                ResultSet executeQuery = connection.createStatement().executeQuery("SELECT domain, name FROM site_info");
                while (executeQuery.next()) {
                    domains.add(executeQuery.getString(1));
                    domainNames.add(executeQuery.getString(2));
                }
                if (connection != null) {
                    try {
                        connection.close();
                    } catch (Exception e) {
                    }
                }
            } catch (Exception e2) {
                logger.error("load domains and domainNames failed");
                logger.error(e2);
                throw new RuntimeException(e2);
            }
        } catch (Throwable th) {
            if (connection != null) {
                try {
                    connection.close();
                } catch (Exception e3) {
                }
            }
            throw th;
        }
    }
}
