From 1a27b89bd5a3f1b51c447e7f38654aba7759e549 Mon Sep 17 00:00:00 2001 From: Florens Douwes Date: Sat, 9 Aug 2014 13:16:42 +0200 Subject: [PATCH] Move post parsing into a seperate class. --- .../floens/chan/core/loader/ChanParser.java | 350 ++++++++++++++++++ .../java/org/floens/chan/core/model/Post.java | 313 +--------------- .../chan/core/net/ChanReaderRequest.java | 2 +- 3 files changed, 355 insertions(+), 310 deletions(-) create mode 100644 Clover/app/src/main/java/org/floens/chan/core/loader/ChanParser.java diff --git a/Clover/app/src/main/java/org/floens/chan/core/loader/ChanParser.java b/Clover/app/src/main/java/org/floens/chan/core/loader/ChanParser.java new file mode 100644 index 00000000..f2b24d41 --- /dev/null +++ b/Clover/app/src/main/java/org/floens/chan/core/loader/ChanParser.java @@ -0,0 +1,350 @@ +/* + * Clover - 4chan browser https://github.com/Floens/Clover/ + * Copyright (C) 2014 Floens + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.floens.chan.core.loader; + + +import android.text.SpannableString; +import android.text.TextUtils; +import android.text.style.AbsoluteSizeSpan; +import android.text.style.ForegroundColorSpan; +import android.text.style.StrikethroughSpan; +import android.text.style.TypefaceSpan; + +import org.floens.chan.ChanApplication; +import org.floens.chan.core.model.Post; +import org.floens.chan.core.model.PostLinkable; +import org.floens.chan.utils.ThemeHelper; +import org.jsoup.Jsoup; +import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; +import org.jsoup.parser.Parser; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; + +import java.util.List; +import java.util.Set; + +public class ChanParser { + private static ChanParser instance; + + static { + instance = new ChanParser(); + } + + public static ChanParser getInstance() { + return instance; + } + + public ChanParser() { + } + + public void parse(Post post) { + try { + if (!TextUtils.isEmpty(post.name)) { + post.name = Parser.unescapeEntities(post.name, false); + } + + if (!TextUtils.isEmpty(post.subject)) { + post.subject = Parser.unescapeEntities(post.subject, false); + } + } catch (Exception e) { + e.printStackTrace(); + } + + if (post.rawComment != null) { + post.comment = parseComment(post, post.rawComment); + } + } + + private CharSequence parseComment(Post post, String commentRaw) { + CharSequence total = new SpannableString(""); + + try { + String comment = commentRaw.replace("", ""); + + Document document = Jsoup.parseBodyFragment(comment); + + List nodes = document.body().childNodes(); + + for (Node node : nodes) { + CharSequence nodeParsed = parseNode(post, node); + if (nodeParsed != null) { + total = TextUtils.concat(total, nodeParsed); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + + return total; + } + + private CharSequence parseNode(Post post, Node node) { + if (node instanceof TextNode) { + String text = ((TextNode) node).text(); + SpannableString spannable = new SpannableString(text); + + detectLinks(post, text, spannable); + + return spannable; + } else { + switch (node.nodeName()) { + case "br": { + return "\n"; + } + case "span": { + Element span = (Element) node; + + SpannableString quote = new SpannableString(span.text()); + + Set classes = span.classNames(); + if (classes.contains("deadlink")) { + quote.setSpan(new ForegroundColorSpan(ThemeHelper.getInstance().getQuoteColor()), 0, quote.length(), 0); + quote.setSpan(new StrikethroughSpan(), 0, quote.length(), 0); + } else { + quote.setSpan(new ForegroundColorSpan(ThemeHelper.getInstance().getInlineQuoteColor()), 0, quote.length(), 0); + detectLinks(post, span.text(), quote); + } + + return quote; + } + case "a": { + CharSequence anchor = parseAnchor(post, (Element) node); + if (anchor != null) { + return anchor; + } else { + return ((Element) node).text(); + } + } + case "s": { + Element spoiler = (Element) node; + + SpannableString link = new SpannableString(spoiler.text()); + + PostLinkable pl = new PostLinkable(post, spoiler.text(), spoiler.text(), PostLinkable.Type.SPOILER); + link.setSpan(pl, 0, link.length(), 0); + post.linkables.add(pl); + + return link; + } + case "pre": { + Element pre = (Element) node; + + Set classes = pre.classNames(); + if (classes.contains("prettyprint")) { + String text = getNodeText(pre); + SpannableString monospace = new SpannableString(text); + monospace.setSpan(new TypefaceSpan("monospace"), 0, monospace.length(), 0); + monospace.setSpan(new AbsoluteSizeSpan(ThemeHelper.getInstance().getCodeTagSize()), 0, monospace.length(), 0); + return monospace; + } else { + return pre.text(); + } + } + default: { + // Unknown tag, add the inner part + if (node instanceof Element) { + return ((Element) node).text(); + } else { + return null; + } + } + } + } + } + + private CharSequence parseAnchor(Post post, Element anchor) { + String href = anchor.attr("href"); + Set classes = anchor.classNames(); + + PostLinkable.Type t = null; + String key = null; + Object value = null; + if (classes.contains("quotelink")) { + if (href.contains("/thread/")) { + // link to another thread + PostLinkable.ThreadLink threadLink = null; + + String[] slashSplit = href.split("/"); + if (slashSplit.length == 4) { + String board = slashSplit[1]; + String nums = slashSplit[3]; + String[] numsSplitted = nums.split("#p"); + if (numsSplitted.length == 2) { + try { + int tId = Integer.parseInt(numsSplitted[0]); + int pId = Integer.parseInt(numsSplitted[1]); + threadLink = new PostLinkable.ThreadLink(board, tId, pId); + } catch (NumberFormatException e) { + } + } + } + + if (threadLink != null) { + t = PostLinkable.Type.THREAD; + key = anchor.text() + " \u2192"; // arrow to the right + value = threadLink; + } + } else { + // normal quote + int id = -1; + + String[] splitted = href.split("#p"); + if (splitted.length == 2) { + try { + id = Integer.parseInt(splitted[1]); + } catch (NumberFormatException e) { + } + } + + if (id >= 0) { + t = PostLinkable.Type.QUOTE; + key = anchor.text(); + value = id; + post.repliesTo.add(id); + + // Append OP when its a reply to OP + if (id == post.resto) { + key += " (OP)"; + } + + // Append You when it's a reply to an saved reply + // todo synchronized + if (ChanApplication.getDatabaseManager().isSavedReply(post.board, id)) { + key += " (You)"; + } + } + } + } else { + // normal link + t = PostLinkable.Type.LINK; + key = anchor.text(); + value = href; + } + + if (t != null && key != null && value != null) { + SpannableString link = new SpannableString(key); + PostLinkable pl = new PostLinkable(post, key, value, t); + link.setSpan(pl, 0, link.length(), 0); + post.linkables.add(pl); + + return link; + } else { + return null; + } + } + + private void detectLinks(Post post, String text, SpannableString spannable) { + int startPos = 0; + int endPos; + while (true) { + startPos = text.indexOf("://", startPos); + if (startPos < 0) break; + + // go back to the first space + while (startPos > 0 && !isWhitespace(text.charAt(startPos - 1))) { + startPos--; + } + + // find the last non whitespace character + endPos = startPos; + while (endPos < text.length() - 1 && !isWhitespace(text.charAt(endPos + 1))) { + endPos++; + } + + // one past + endPos++; + + String linkString = text.substring(startPos, endPos); + + PostLinkable pl = new PostLinkable(post, linkString, linkString, PostLinkable.Type.LINK); + spannable.setSpan(pl, startPos, endPos, 0); + post.linkables.add(pl); + + startPos = endPos; + } + } + + private boolean isWhitespace(char c) { + return Character.isWhitespace(c) || c == '>'; // consider > as a link separator + } + + // Below code taken from org.jsoup.nodes.Element.text(), but it preserves
+ private String getNodeText(Element node) { + final StringBuilder accum = new StringBuilder(); + new NodeTraversor(new NodeVisitor() { + public void head(Node node, int depth) { + if (node instanceof TextNode) { + TextNode textNode = (TextNode) node; + appendNormalisedText(accum, textNode); + } else if (node instanceof Element) { + Element element = (Element) node; + if (accum.length() > 0 && + element.isBlock() && + !lastCharIsWhitespace(accum)) + accum.append(" "); + + if (element.tag().getName().equals("br")) { + accum.append("\n"); + } + } + } + + public void tail(Node node, int depth) { + } + }).traverse(node); + return accum.toString().trim(); + } + + private static boolean lastCharIsWhitespace(StringBuilder sb) { + return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; + } + + private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { + String text = textNode.getWholeText(); + + if (!preserveWhitespace(textNode.parent())) { + text = normaliseWhitespace(text); + if (lastCharIsWhitespace(accum)) + text = stripLeadingWhitespace(text); + } + accum.append(text); + } + + private static String normaliseWhitespace(String text) { + text = StringUtil.normaliseWhitespace(text); + return text; + } + + private static String stripLeadingWhitespace(String text) { + return text.replaceFirst("^\\s+", ""); + } + + private static boolean preserveWhitespace(Node node) { + // looks only at this element and one level up, to prevent recursion & needless stack searches + if (node != null && node instanceof Element) { + Element element = (Element) node; + return element.tag().preserveWhitespace() || + element.parent() != null && element.parent().tag().preserveWhitespace(); + } + return false; + } +} diff --git a/Clover/app/src/main/java/org/floens/chan/core/model/Post.java b/Clover/app/src/main/java/org/floens/chan/core/model/Post.java index 7eaffa87..e8da32a1 100644 --- a/Clover/app/src/main/java/org/floens/chan/core/model/Post.java +++ b/Clover/app/src/main/java/org/floens/chan/core/model/Post.java @@ -18,30 +18,14 @@ package org.floens.chan.core.model; import android.text.SpannableString; -import android.text.TextUtils; -import android.text.style.AbsoluteSizeSpan; -import android.text.style.ForegroundColorSpan; -import android.text.style.StrikethroughSpan; -import android.text.style.TypefaceSpan; -import org.floens.chan.ChanApplication; import org.floens.chan.chan.ChanUrls; -import org.floens.chan.core.model.PostLinkable.Type; +import org.floens.chan.core.loader.ChanParser; import org.floens.chan.ui.view.PostView; -import org.floens.chan.utils.ThemeHelper; -import org.jsoup.Jsoup; -import org.jsoup.helper.StringUtil; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.nodes.Node; -import org.jsoup.nodes.TextNode; import org.jsoup.parser.Parser; -import org.jsoup.select.NodeTraversor; -import org.jsoup.select.NodeVisitor; import java.util.ArrayList; import java.util.List; -import java.util.Set; /** * Contains all data needed to represent a single post. @@ -77,6 +61,8 @@ public class Post { public String title = ""; public int fileSize; public int images = -1; + public String rawComment; + public boolean deleted = false; /** @@ -90,7 +76,6 @@ public class Post { public List repliesFrom = new ArrayList<>(); public final ArrayList linkables = new ArrayList<>(); - public boolean parsedSpans = false; public SpannableString subjectSpan; public SpannableString nameSpan; @@ -102,15 +87,10 @@ public class Post { * The PostView the Post is currently bound to. */ private PostView linkableListener; - private String rawComment; public Post() { } - public void setComment(String e) { - rawComment = e; - } - public void setLinkableListener(PostView listener) { linkableListener = listener; } @@ -149,295 +129,10 @@ public class Post { filename = Parser.unescapeEntities(filename, false); } - if (rawComment != null) { - comment = parseComment(rawComment); - } - - try { - if (!TextUtils.isEmpty(name)) { - name = Parser.unescapeEntities(name, false); - } - - if (!TextUtils.isEmpty(subject)) { - subject = Parser.unescapeEntities(subject, false); - } - } catch (Exception e) { - e.printStackTrace(); - } + ChanParser.getInstance().parse(this); return true; } - private CharSequence parseComment(String commentRaw) { - CharSequence total = new SpannableString(""); - - try { - String comment = commentRaw.replace("", ""); - - Document document = Jsoup.parseBodyFragment(comment); - - List nodes = document.body().childNodes(); - - for (Node node : nodes) { - CharSequence nodeParsed = parseNode(node); - if (nodeParsed != null) { - total = TextUtils.concat(total, nodeParsed); - } - } - } catch (Exception e) { - e.printStackTrace(); - } - - return total; - } - - private CharSequence parseNode(Node node) { - if (node instanceof TextNode) { - String text = ((TextNode) node).text(); - SpannableString spannable = new SpannableString(text); - - detectLinks(text, spannable); - - return spannable; - } else { - switch (node.nodeName()) { - case "br": { - return "\n"; - } - case "span": { - Element span = (Element) node; - - SpannableString quote = new SpannableString(span.text()); - - Set classes = span.classNames(); - if (classes.contains("deadlink")) { - quote.setSpan(new ForegroundColorSpan(ThemeHelper.getInstance().getQuoteColor()), 0, quote.length(), 0); - quote.setSpan(new StrikethroughSpan(), 0, quote.length(), 0); - } else { - quote.setSpan(new ForegroundColorSpan(ThemeHelper.getInstance().getInlineQuoteColor()), 0, quote.length(), 0); - detectLinks(span.text(), quote); - } - - return quote; - } - case "a": { - CharSequence anchor = parseAnchor((Element) node); - if (anchor != null) { - return anchor; - } else { - return ((Element) node).text(); - } - } - case "s": { - Element spoiler = (Element) node; - - SpannableString link = new SpannableString(spoiler.text()); - - PostLinkable pl = new PostLinkable(this, spoiler.text(), spoiler.text(), Type.SPOILER); - link.setSpan(pl, 0, link.length(), 0); - linkables.add(pl); - - return link; - } - case "pre": { - Element pre = (Element) node; - - Set classes = pre.classNames(); - if (classes.contains("prettyprint")) { - String text = getNodeText(pre); - SpannableString monospace = new SpannableString(text); - monospace.setSpan(new TypefaceSpan("monospace"), 0, monospace.length(), 0); - monospace.setSpan(new AbsoluteSizeSpan(ThemeHelper.getInstance().getCodeTagSize()), 0, monospace.length(), 0); - return monospace; - } else { - return pre.text(); - } - } - default: { - // Unknown tag, add the inner part - if (node instanceof Element) { - return ((Element) node).text(); - } else { - return null; - } - } - } - } - } - - private CharSequence parseAnchor(Element anchor) { - String href = anchor.attr("href"); - Set classes = anchor.classNames(); - - Type t = null; - String key = null; - Object value = null; - if (classes.contains("quotelink")) { - if (href.contains("/thread/")) { - // link to another thread - PostLinkable.ThreadLink threadLink = null; - - String[] slashSplit = href.split("/"); - if (slashSplit.length == 4) { - String board = slashSplit[1]; - String nums = slashSplit[3]; - String[] numsSplitted = nums.split("#p"); - if (numsSplitted.length == 2) { - try { - int tId = Integer.parseInt(numsSplitted[0]); - int pId = Integer.parseInt(numsSplitted[1]); - threadLink = new PostLinkable.ThreadLink(board, tId, pId); - } catch (NumberFormatException e) { - } - } - } - - if (threadLink != null) { - t = Type.THREAD; - key = anchor.text() + " \u2192"; // arrow to the right - value = threadLink; - } - } else { - // normal quote - int id = -1; - - String[] splitted = href.split("#p"); - if (splitted.length == 2) { - try { - id = Integer.parseInt(splitted[1]); - } catch (NumberFormatException e) { - } - } - - if (id >= 0) { - t = Type.QUOTE; - key = anchor.text(); - value = id; - repliesTo.add(id); - - // Append OP when its a reply to OP - if (id == resto) { - key += " (OP)"; - } - // Append You when it's a reply to an saved reply - if (ChanApplication.getDatabaseManager().isSavedReply(board, id)) { - key += " (You)"; - } - } - } - } else { - // normal link - t = Type.LINK; - key = anchor.text(); - value = href; - } - - if (t != null && key != null && value != null) { - SpannableString link = new SpannableString(key); - PostLinkable pl = new PostLinkable(this, key, value, t); - link.setSpan(pl, 0, link.length(), 0); - linkables.add(pl); - - return link; - } else { - return null; - } - } - - private void detectLinks(String text, SpannableString spannable) { - int startPos = 0; - int endPos; - while (true) { - startPos = text.indexOf("://", startPos); - if (startPos < 0) break; - - // go back to the first space - while (startPos > 0 && !isWhitespace(text.charAt(startPos - 1))) { - startPos--; - } - - // find the last non whitespace character - endPos = startPos; - while (endPos < text.length() - 1 && !isWhitespace(text.charAt(endPos + 1))) { - endPos++; - } - - // one past - endPos++; - - String linkString = text.substring(startPos, endPos); - - PostLinkable pl = new PostLinkable(this, linkString, linkString, PostLinkable.Type.LINK); - spannable.setSpan(pl, startPos, endPos, 0); - linkables.add(pl); - - startPos = endPos; - } - } - - private boolean isWhitespace(char c) { - return Character.isWhitespace(c) || c == '>'; // consider > as a link separator - } - - // Below code taken from org.jsoup.nodes.Element.text(), but it preserves
- private String getNodeText(Element node) { - final StringBuilder accum = new StringBuilder(); - new NodeTraversor(new NodeVisitor() { - public void head(Node node, int depth) { - if (node instanceof TextNode) { - TextNode textNode = (TextNode) node; - appendNormalisedText(accum, textNode); - } else if (node instanceof Element) { - Element element = (Element) node; - if (accum.length() > 0 && - element.isBlock() && - !lastCharIsWhitespace(accum)) - accum.append(" "); - - if (element.tag().getName().equals("br")) { - accum.append("\n"); - } - } - } - - public void tail(Node node, int depth) { - } - }).traverse(node); - return accum.toString().trim(); - } - - private static boolean lastCharIsWhitespace(StringBuilder sb) { - return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' '; - } - - private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { - String text = textNode.getWholeText(); - - if (!preserveWhitespace(textNode.parent())) { - text = normaliseWhitespace(text); - if (lastCharIsWhitespace(accum)) - text = stripLeadingWhitespace(text); - } - accum.append(text); - } - - private static String normaliseWhitespace(String text) { - text = StringUtil.normaliseWhitespace(text); - return text; - } - - private static String stripLeadingWhitespace(String text) { - return text.replaceFirst("^\\s+", ""); - } - - private static boolean preserveWhitespace(Node node) { - // looks only at this element and one level up, to prevent recursion & needless stack searches - if (node != null && node instanceof Element) { - Element element = (Element) node; - return element.tag().preserveWhitespace() || - element.parent() != null && element.parent().tag().preserveWhitespace(); - } - return false; - } } diff --git a/Clover/app/src/main/java/org/floens/chan/core/net/ChanReaderRequest.java b/Clover/app/src/main/java/org/floens/chan/core/net/ChanReaderRequest.java index b140e99e..4e8815aa 100644 --- a/Clover/app/src/main/java/org/floens/chan/core/net/ChanReaderRequest.java +++ b/Clover/app/src/main/java/org/floens/chan/core/net/ChanReaderRequest.java @@ -318,7 +318,7 @@ public class ChanReaderRequest extends JsonReaderRequest> { post.name = reader.nextString(); break; case "com": - post.setComment(reader.nextString()); + post.rawComment = reader.nextString(); break; case "tim": post.tim = reader.nextString();