diff --git a/pom.xml b/pom.xml index 154d560..ed9c59d 100644 --- a/pom.xml +++ b/pom.xml @@ -12,7 +12,7 @@ maven-compiler-plugin 3.8.1 - 20 + 17 diff --git a/src/me/mrletsplay/scraperbot/ScraperTask.java b/src/me/mrletsplay/scraperbot/ScraperTask.java index 44fa88f..bf15dbe 100644 --- a/src/me/mrletsplay/scraperbot/ScraperTask.java +++ b/src/me/mrletsplay/scraperbot/ScraperTask.java @@ -36,7 +36,7 @@ public class ScraperTask { private String cookie, // For CF bypass, login etc. - userAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0"; + userAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0"; private FilterList exclude; private FilterList include; @@ -131,7 +131,7 @@ public class ScraperTask { break; } - System.out.println(toVisit.size() + " " + url); // TODO: show in info command + System.out.println("Visiting " + url + " (" + toVisit.size() + " left)"); // TODO: show in info command toVisit.remove(url); if(!visited.add(url)) { System.out.println("Deja vu, I've been in this place before"); @@ -141,7 +141,7 @@ public class ScraperTask { Set links = getLinks(url); if(Thread.currentThread().isInterrupted()) break; links.removeAll(visited); - links.stream().filter(l -> include.matches(url) && !exclude.matches(url)).forEach(toVisit::add); + links.stream().filter(l -> include.matches(l) && !exclude.matches(l)).forEach(toVisit::add); Set newFound = links.stream().filter(l -> find.matches(l)).collect(Collectors.toSet()); System.out.println("FOUND: " + newFound); // TODO: show in info command @@ -169,7 +169,8 @@ public class ScraperTask { } private static String clean(String uri) throws URISyntaxException { - URI u = URI.create(uri); +// System.out.println(uri); + URI u = URI.create(uri.replace(" ", "%20")); return new URI(u.getScheme(), u.getSchemeSpecificPart(), null).toString(); }