package com.gemantic.parser.tools;

import com.gemantic.parser.impl.NewsParser;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.Utils;
import org.htmlcleaner.XPatherException;
import org.springframework.context.support.ClassPathXmlApplicationContext;

/* loaded from: input_file:com/gemantic/parser/tools/FetchTools.class */
public class FetchTools {
    private NewsParser parser;
    private static Logger logger = Logger.getLogger(FetchTools.class);

    public FetchTools() {
        init();
    }

    public NewsParser getParser() {
        return this.parser;
    }

    public void setParser(NewsParser newsParser) {
        this.parser = newsParser;
    }

    private void init() {
        try {
            this.parser = (NewsParser) new ClassPathXmlApplicationContext("classpath:META-INF/applicationContext_parser.xml").getBean("NewsParser");
        } catch (Throwable th) {
            logger.error("cant get spring classpath:META-INF/applicationContext_parser.xml");
            th.printStackTrace();
            logger.error(th.getMessage());
        }
    }

    public static void main(String[] strArr) throws Exception {
        logger.info("args.length=" + strArr.length);
        if (strArr.length < 2) {
            System.err.println("Usage: java com.gemantic.parser.tools.FetchTools <input_file> <output_file> ");
            System.exit(1);
        }
        FetchTools fetchTools = new FetchTools();
        logger.info("=========begin ============");
        String str = strArr[0];
        String str2 = strArr[1];
        List<String> readFileAsList = readFileAsList(str);
        FileWriter fileWriter = new FileWriter(str2);
        logger.info("url size = " + readFileAsList.size());
        for (int i = 0; i < readFileAsList.size(); i++) {
            String str3 = readFileAsList.get(i);
            if (str3.indexOf(",") > 0) {
                String[] split = str3.split(",");
                String str4 = split[0];
                String str5 = split[1];
                URL url = new URL(str4);
                logger.info("i=" + i + ";Key=" + str4 + "; value=" + str5);
                TagNode htmlTagNode = fetchTools.parser.getHtmlTagNode(Utils.readUrl(url, "utf-8").toString());
                if (htmlTagNode == null) {
                    logger.warn("cannot convert to tagnode:" + url);
                } else {
                    try {
                        for (Object obj : htmlTagNode.evaluateXPath("//div[@id='all-sort']/dl/dd/a")) {
                            TagNode tagNode = (TagNode) obj;
                            logger.info(str5 + "," + ((Object) tagNode.getText()) + "\n");
                            fileWriter.write(str5 + "," + ((Object) tagNode.getText()) + "\n");
                        }
                    } catch (XPatherException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
        fileWriter.close();
        logger.info("write success, file = " + str2);
    }

    public static List<String> readFileAsList(String str) throws IOException {
        ArrayList arrayList = new ArrayList();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str)));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return arrayList;
            }
            if (readLine != null && !"".equals(readLine.trim())) {
                arrayList.add(readLine.trim());
            }
        }
    }
}
