/*
 * Decompiled with CFR 0.152.
 */
package ISSearch;

import ISSearch.ISCrawlerInterface;
import ISSearch.ISDocument;
import ISSearch.ISDocumentInterface;
import ISSearch.ISParser;
import ISSearch.QueueElement;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.StringTokenizer;
import java.util.Vector;

public class ISCrawler
implements ISCrawlerInterface {
    static final String DISALLOW = "Disallow:";
    static int MAX_CRAWL_DEPTH = 3;
    static int MAX_QUEUE_SIZE = 500;
    static int TIMEOUT = 2000;
    static ISDocumentInterface CURRENT_DOCUMENT = null;
    int crawl_level;
    static int crawler_state = 200;
    Vector vectorToSearch = new Vector();
    Vector vectorSearched = new Vector();

    public void addLink(URL link) {
        if (this.crawl_level + 1 > MAX_CRAWL_DEPTH) {
            return;
        }
        QueueElement qelem = new QueueElement();
        qelem.url = link;
        qelem.crawl_level = this.crawl_level;
        if (!this.isVisited(link) && this.crawl_level <= MAX_CRAWL_DEPTH) {
            this.vectorToSearch.addElement(qelem);
        }
    }

    public URL getBest() {
        return this.getNextURL();
    }

    public int getCrawlingDepth() {
        return MAX_CRAWL_DEPTH;
    }

    public ISDocumentInterface getCurrentDocument() {
        return CURRENT_DOCUMENT;
    }

    public void setCurrentDocument(ISDocumentInterface isd) {
        CURRENT_DOCUMENT = isd;
    }

    public URL getCurrentURL() {
        return (URL)this.vectorSearched.lastElement();
    }

    public int getMaxQueueSize() {
        return MAX_QUEUE_SIZE;
    }

    public int getQueueSize() {
        return this.vectorToSearch.size();
    }

    public int getState() {
        return crawler_state;
    }

    public void setState(int state_code) {
        crawler_state = state_code;
    }

    public int getTimeout() {
        return TIMEOUT;
    }

    public void setTimeout(int t) {
        TIMEOUT = t;
    }

    public boolean isVisited(URL doc) {
        for (int i = 0; i < this.vectorSearched.size(); ++i) {
            if (!this.vectorSearched.elementAt(i).equals(doc)) continue;
            return true;
        }
        return false;
    }

    public void setCrawlingDepth(int depth) {
        MAX_CRAWL_DEPTH = depth;
    }

    public void setQueueMaxSize(int m) {
        MAX_QUEUE_SIZE = m;
    }

    public void start() {
        Thread crawl = new Thread(this);
        this.setState(100);
        crawl.start();
    }

    public void stop() {
        this.setState(200);
    }

    public void reset() {
        this.vectorSearched.removeAllElements();
        this.vectorToSearch.removeAllElements();
        this.setState(200);
    }

    public void run() {
        ISDocumentInterface isd = new ISDocument();
        while (!this.isDataStructureEmpty() && this.getState() == 100 && this.crawl_level <= MAX_CRAWL_DEPTH) {
            URL url = null;
            if (this.getState() == 100) {
                url = this.getNextURL();
            }
            if (url != null && this.robotSafe(url) && url.getProtocol().equals("http")) {
                try {
                    URLConnection urlConnection = url.openConnection();
                    urlConnection.setConnectTimeout(TIMEOUT);
                    System.out.println("CONNECTION OPENED: " + url.toString());
                    String type = this.getContentType(urlConnection);
                    System.out.println("CONTENT GOT: " + type);
                    if (!type.equals("text/html") && !type.equals("text/plain") || this.getState() != 100) continue;
                    InputStream urlstream = url.openStream();
                    InputStreamReader urlstreamreader = new InputStreamReader(urlstream);
                    isd = this.runParser(urlstreamreader);
                    isd.setLink(url.toString());
                    this.setCurrentDocument(isd);
                    if (this.crawl_level < MAX_CRAWL_DEPTH) {
                        System.out.println("Adding URLs to queue...");
                        for (int i = 0; i < isd.getUrls().length; ++i) {
                            ++this.crawl_level;
                            if (this.getQueueSize() < MAX_QUEUE_SIZE) {
                                this.addLink(isd.getUrls()[i]);
                                System.out.println(isd.getUrls()[i]);
                            }
                            --this.crawl_level;
                        }
                    }
                    System.out.println("(finished)");
                    urlstream.close();
                    urlstreamreader.close();
                }
                catch (IOException e) {
                }
                catch (NullPointerException nullPointerException) {}
                continue;
            }
            System.out.println("unsupported site");
        }
        System.out.println(this.getState());
        if (this.getState() == 200) {
            System.out.println("Crawler says 'Bye Bye' ;-)");
        }
    }

    public URL getNextURL() {
        QueueElement qelem = (QueueElement)this.vectorToSearch.elementAt(0);
        if (this.crawl_level >= MAX_CRAWL_DEPTH) {
            System.out.println("getNextURL: return null");
            return null;
        }
        if (qelem.crawl_level > this.crawl_level) {
            ++this.crawl_level;
            return this.getNextURL();
        }
        URL temp = qelem.url;
        this.vectorSearched.addElement(temp);
        this.vectorToSearch.removeElementAt(0);
        System.out.println("NEXT URL GIVEN");
        return temp;
    }

    public boolean isDataStructureEmpty() {
        return this.vectorToSearch.isEmpty();
    }

    public ISDocumentInterface runParser(Reader r) {
        ISParser p = new ISParser();
        ISDocumentInterface isd = new ISDocument();
        isd = p.parse(r);
        return isd;
    }

    public boolean robotSafe(URL url) {
        String strPath;
        StringTokenizer st;
        String strCommands;
        URL urlRobot;
        String strHost = url.getHost();
        System.out.println(strHost);
        String strRobot = "http://" + strHost + "/robots.txt";
        try {
            urlRobot = new URL(strRobot);
        }
        catch (MalformedURLException e) {
            return false;
        }
        try {
            InputStream urlRobotStream = urlRobot.openStream();
            byte[] b = new byte[1000];
            int numRead = urlRobotStream.read(b);
            strCommands = new String(b, 0, numRead);
            while (numRead != -1) {
                numRead = urlRobotStream.read(b);
                if (numRead == -1) continue;
                String newCommands = new String(b, 0, numRead);
                strCommands = strCommands + newCommands;
            }
            urlRobotStream.close();
        }
        catch (IOException e) {
            return true;
        }
        catch (IllegalArgumentException e) {
            return false;
        }
        catch (StringIndexOutOfBoundsException e) {
            return false;
        }
        String strURL = url.getFile();
        int index = 0;
        while ((index = strCommands.indexOf(DISALLOW, index)) != -1 && (st = new StringTokenizer(strPath = strCommands.substring(index += DISALLOW.length()))).hasMoreTokens()) {
            String strBadPath = st.nextToken();
            if (strURL.indexOf(strBadPath) != 0) continue;
            return false;
        }
        return true;
    }

    public String getContentType(URLConnection urlConnection) {
        StringTokenizer s = new StringTokenizer(urlConnection.getContentType().toString(), " \t\n\r\f,.;:?!", false);
        return s.nextElement().toString();
    }

    public static void main(String[] args) {
    }
}

