From e1e5ad88ee2b5d5c65f09277375d442f707dd258 Mon Sep 17 00:00:00 2001 From: Floens Date: Wed, 24 Jul 2019 21:02:49 +0200 Subject: [PATCH] commentparser: handle internal links with whitelist of domain The fullQuote and quote mathers are now still run on the path segment of the given url, if the domain is whitelisted as such. We do this by stripping off the domain from the href if it has one. If it ends up not matching quote patterns we still use the href as a normal link. --- .../floens/chan/core/model/PostLinkable.java | 1 + .../chan/core/site/parser/CommentParser.java | 29 +++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Clover/app/src/main/java/org/floens/chan/core/model/PostLinkable.java b/Clover/app/src/main/java/org/floens/chan/core/model/PostLinkable.java index ea93d1ce..ee3df216 100644 --- a/Clover/app/src/main/java/org/floens/chan/core/model/PostLinkable.java +++ b/Clover/app/src/main/java/org/floens/chan/core/model/PostLinkable.java @@ -31,6 +31,7 @@ import org.floens.chan.ui.theme.Theme; * PostCell has a {@link PostCell.PostViewMovementMethod}, that searches spans at the location the TextView was tapped, * and handled if it was a PostLinkable. */ +@SuppressWarnings("JavadocReference") public class PostLinkable extends ClickableSpan { public enum Type { QUOTE, LINK, SPOILER, THREAD diff --git a/Clover/app/src/main/java/org/floens/chan/core/site/parser/CommentParser.java b/Clover/app/src/main/java/org/floens/chan/core/site/parser/CommentParser.java index 25e0d35a..70b3cab4 100644 --- a/Clover/app/src/main/java/org/floens/chan/core/site/parser/CommentParser.java +++ b/Clover/app/src/main/java/org/floens/chan/core/site/parser/CommentParser.java @@ -53,6 +53,7 @@ public class CommentParser { private Pattern colorPattern = Pattern.compile("color:#([0-9a-fA-F]+)"); private Map> rules = new HashMap<>(); + private List internalDomains = new ArrayList<>(0); public CommentParser() { // Required tags. @@ -101,6 +102,10 @@ public class CommentParser { this.fullQuotePattern = fullQuotePattern; } + public void addInternalDomain(String domain) { + this.internalDomains.add(domain); + } + public CharSequence handleTag(PostParser.Callback callback, Theme theme, Post.Builder post, @@ -233,11 +238,29 @@ public class CommentParser { public Link matchAnchor(Post.Builder post, CharSequence text, Element anchor, PostParser.Callback callback) { String href = anchor.attr("href"); + // For inner links we handle it as relative (for sites that have multiple domains). + String path = ""; + if (href.startsWith("//") || href.startsWith("http://") || href.startsWith("https://")) { + int offset = href.startsWith("//") ? 2 : (href.startsWith("http://") ? 7 : 8); + + String domain = href.substring(Math.min(href.length(), offset), + Math.min(href.length(), href.indexOf('/', offset))); + // Whitelisting domains is optional. + // If you don't specify it it will purely use the quote patterns to match. + if (internalDomains.isEmpty() || internalDomains.contains(domain)) { + int pathStart = href.indexOf('/', offset); + if (pathStart >= 0) { + path = href.substring(pathStart); + } + } + } else { + path = href; + } PostLinkable.Type t; Object value; - Matcher externalMatcher = fullQuotePattern.matcher(href); + Matcher externalMatcher = fullQuotePattern.matcher(path); if (externalMatcher.matches()) { String board = externalMatcher.group(1); int threadId = Integer.parseInt(externalMatcher.group(2)); @@ -251,12 +274,12 @@ public class CommentParser { value = new PostLinkable.ThreadLink(board, threadId, postId); } } else { - Matcher quoteMatcher = quotePattern.matcher(href); + Matcher quoteMatcher = quotePattern.matcher(path); if (quoteMatcher.matches()) { t = PostLinkable.Type.QUOTE; value = Integer.parseInt(quoteMatcher.group(1)); } else { - // normal link + // normal link, use original href t = PostLinkable.Type.LINK; value = href; }