package ie.moguntia.webcrawler; import ie.moguntia.threads.*; import java.net.*; import java.io.*; import java.util.Vector; public class WSDLCrawlerThread extends ControllableThread { public void process(Object o) { // The objects that we're dealing with here a strings for urls try { URL pageURL = (URL) o; String mimetype = pageURL.openConnection().getContentType(); // Discard all non-text files // Further assumptions on the mime type should not be made, because // some WSDLs advertise themselves as text/plain, others as text/xml // Anyway, we should try to identify WSDL pages by the definitions- // tag rather than by content-type. if (!mimetype.startsWith("text")) return; String rawPage = SaveURL.getURL(pageURL); // I don't know if it is legal, but we also want to identify the // file as WSDL if the definitions-tag is not spelled in small // letters. String smallPage = rawPage.toLowerCase().replaceAll("\\s", " "); if (smallPage.indexOf("