Optimize parsing with multithreading

Various other optimizations
multisite
Floens 9 years ago
parent 4fb752f297
commit 257bf13480
  1. 5
      Clover/app/src/main/java/org/floens/chan/chan/ChanParser.java
  2. 26
      Clover/app/src/main/java/org/floens/chan/core/manager/FilterEngine.java
  3. 7
      Clover/app/src/main/java/org/floens/chan/core/model/Filter.java
  4. 11
      Clover/app/src/main/java/org/floens/chan/core/model/Post.java
  5. 266
      Clover/app/src/main/java/org/floens/chan/core/net/ChanReaderRequest.java
  6. 13
      Clover/app/src/main/java/org/floens/chan/core/net/JsonReaderRequest.java
  7. 81
      Clover/app/src/main/java/org/floens/chan/core/net/PostParseCallable.java

@ -92,13 +92,12 @@ public class ChanParser {
e.printStackTrace(); e.printStackTrace();
} }
if (!post.parsedSpans) {
post.parsedSpans = true;
parseSpans(theme, post); parseSpans(theme, post);
}
if (post.rawComment != null) { if (post.rawComment != null) {
post.comment = parseComment(theme, post, post.rawComment); post.comment = parseComment(theme, post, post.rawComment);
} else {
post.comment = "";
} }
} }

@ -29,7 +29,9 @@ import org.floens.chan.utils.Logger;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
@ -67,6 +69,8 @@ public class FilterEngine {
} }
} }
private final Map<String, Pattern> patternCache = new HashMap<>();
private final DatabaseManager databaseManager; private final DatabaseManager databaseManager;
private final DatabaseFilterManager databaseFilterManager; private final DatabaseFilterManager databaseFilterManager;
@ -133,21 +137,25 @@ public class FilterEngine {
} }
if (matchRegex) { if (matchRegex) {
Matcher matcher = null; Pattern pattern = null;
if (!forceCompile) { if (!forceCompile) {
matcher = filter.compiledMatcher; synchronized (patternCache) {
pattern = patternCache.get(filter.pattern);
}
} }
if (matcher == null) { if (pattern == null) {
Pattern compiledPattern = compile(filter.pattern); pattern = compile(filter.pattern);
if (compiledPattern != null) { if (pattern != null) {
matcher = filter.compiledMatcher = compiledPattern.matcher(""); synchronized (patternCache) {
Logger.d(TAG, "Resulting pattern: " + filter.compiledMatcher); patternCache.put(filter.pattern, pattern);
}
Logger.d(TAG, "Resulting pattern: " + pattern.pattern());
} }
} }
if (matcher != null) { if (pattern != null) {
matcher.reset(text); Matcher matcher = pattern.matcher(text);
try { try {
return matcher.find(); return matcher.find();
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {

@ -22,8 +22,6 @@ import com.j256.ormlite.table.DatabaseTable;
import org.floens.chan.core.manager.FilterType; import org.floens.chan.core.manager.FilterType;
import java.util.regex.Matcher;
@DatabaseTable @DatabaseTable
public class Filter { public class Filter {
@DatabaseField(generatedId = true) @DatabaseField(generatedId = true)
@ -51,11 +49,6 @@ public class Filter {
@DatabaseField(canBeNull = false) @DatabaseField(canBeNull = false)
public int color; public int color;
/**
* Cached version of {@link #pattern} compiled by {@link org.floens.chan.core.manager.FilterEngine#compile(String)}.
*/
public Matcher compiledMatcher;
public boolean hasFilter(FilterType filterType) { public boolean hasFilter(FilterType filterType) {
return (type & filterType.flag) != 0; return (type & filterType.flag) != 0;
} }

@ -110,8 +110,6 @@ public class Post {
public final ArrayList<PostLinkable> linkables = new ArrayList<>(); public final ArrayList<PostLinkable> linkables = new ArrayList<>();
public boolean parsedSpans = false;
public SpannableString subjectSpan; public SpannableString subjectSpan;
public SpannableString nameSpan; public SpannableString nameSpan;
@ -149,16 +147,15 @@ public class Post {
* @return false if this data is invalid * @return false if this data is invalid
*/ */
public boolean finish() { public boolean finish() {
if (board == null) if (board == null || no < 0 || resto < 0 || date == null || time < 0) {
return false;
if (no < 0 || resto < 0 || date == null || time < 0)
return false; return false;
}
isOP = resto == 0; isOP = resto == 0;
if (isOP && (replies < 0 || images < 0)) if (isOP && (replies < 0 || images < 0)) {
return false; return false;
}
if (filename != null && ext != null && imageWidth > 0 && imageHeight > 0 && tim >= 0) { if (filename != null && ext != null && imageWidth > 0 && imageHeight > 0 && tim >= 0) {
hasImage = true; hasImage = true;

@ -25,29 +25,50 @@ import com.android.volley.Response.Listener;
import org.floens.chan.Chan; import org.floens.chan.Chan;
import org.floens.chan.chan.ChanUrls; import org.floens.chan.chan.ChanUrls;
import org.floens.chan.core.database.DatabaseManager; import org.floens.chan.core.database.DatabaseManager;
import org.floens.chan.core.database.DatabaseSavedReplyManager;
import org.floens.chan.core.manager.FilterEngine; import org.floens.chan.core.manager.FilterEngine;
import org.floens.chan.core.model.Filter; import org.floens.chan.core.model.Filter;
import org.floens.chan.core.model.Loadable; import org.floens.chan.core.model.Loadable;
import org.floens.chan.core.model.Post; import org.floens.chan.core.model.Post;
import org.floens.chan.utils.Logger; import org.floens.chan.utils.Time;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanReaderResponse> { public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanReaderResponse> {
private static final String TAG = "ChanReaderRequest"; private static final String TAG = "ChanReaderRequest";
private static final boolean LOG_TIMING = false;
private static final int THREAD_COUNT;
private static ExecutorService EXECUTOR;
static {
THREAD_COUNT = Runtime.getRuntime().availableProcessors();
EXECUTOR = Executors.newFixedThreadPool(THREAD_COUNT);
}
private Loadable loadable; private Loadable loadable;
private List<Post> cached; private List<Post> cached;
private Post op; private Post op;
private FilterEngine filterEngine; private FilterEngine filterEngine;
private DatabaseManager databaseManager; private DatabaseManager databaseManager;
private DatabaseSavedReplyManager databaseSavedReplyManager;
private List<Filter> filters; private List<Filter> filters;
private long startLoad;
private ChanReaderRequest(String url, Listener<ChanReaderResponse> listener, ErrorListener errorListener) { private ChanReaderRequest(String url, Listener<ChanReaderResponse> listener, ErrorListener errorListener) {
super(url, listener, errorListener); super(url, listener, errorListener);
filterEngine = FilterEngine.getInstance(); filterEngine = FilterEngine.getInstance();
databaseManager = Chan.getDatabaseManager(); databaseManager = Chan.getDatabaseManager();
databaseSavedReplyManager = databaseManager.getDatabaseSavedReplyManager();
} }
public static ChanReaderRequest newInstance(Loadable loadable, List<Post> cached, Listener<ChanReaderResponse> listener, ErrorListener errorListener) { public static ChanReaderRequest newInstance(Loadable loadable, List<Post> cached, Listener<ChanReaderResponse> listener, ErrorListener errorListener) {
@ -87,6 +108,8 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
} }
} }
request.startLoad = Time.startTiming();
return request; return request;
} }
@ -97,84 +120,172 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
@Override @Override
public ChanReaderResponse readJson(JsonReader reader) throws Exception { public ChanReaderResponse readJson(JsonReader reader) throws Exception {
List<Post> list; if (LOG_TIMING) {
Time.endTiming("Network", startLoad);
}
long load = Time.startTiming();
ProcessingQueue processing = new ProcessingQueue();
Map<Integer, Post> cachedByNo = new HashMap<>();
for (int i = 0; i < cached.size(); i++) {
Post cache = cached.get(i);
cachedByNo.put(cache.no, cache);
}
if (loadable.isThreadMode()) { if (loadable.isThreadMode()) {
list = loadThread(reader); loadThread(reader, processing, cachedByNo);
} else if (loadable.isCatalogMode()) { } else if (loadable.isCatalogMode()) {
list = loadCatalog(reader); loadCatalog(reader, processing, cachedByNo);
} else { } else {
throw new IllegalArgumentException("Unknown mode"); throw new IllegalArgumentException("Unknown mode");
} }
if (LOG_TIMING) {
Time.endTiming("Load json", load);
}
List<Post> list = parsePosts(processing);
return processPosts(list); return processPosts(list);
} }
private ChanReaderResponse processPosts(List<Post> serverList) throws Exception { // Concurrently parses the new posts with an executor
private List<Post> parsePosts(ProcessingQueue queue) throws InterruptedException, ExecutionException {
long parsePosts = Time.startTiming();
List<Post> total = new ArrayList<>();
total.addAll(queue.cached);
List<Callable<Post>> tasks = new ArrayList<>(queue.toParse.size());
for (int i = 0; i < queue.toParse.size(); i++) {
Post post = queue.toParse.get(i);
tasks.add(new PostParseCallable(filterEngine, filters, databaseSavedReplyManager, post));
}
if (!tasks.isEmpty()) {
List<Future<Post>> futures = EXECUTOR.invokeAll(tasks);
for (int i = 0; i < futures.size(); i++) {
Future<Post> future = futures.get(i);
Post parsedPost = future.get();
if (parsedPost != null) {
total.add(parsedPost);
}
}
if (LOG_TIMING) {
Time.endTiming("Parse posts with " + THREAD_COUNT + " threads", parsePosts);
}
}
return total;
}
private ChanReaderResponse processPosts(List<Post> serverPosts) throws Exception {
ChanReaderResponse response = new ChanReaderResponse(); ChanReaderResponse response = new ChanReaderResponse();
response.posts = new ArrayList<>(serverList.size()); response.posts = new ArrayList<>(serverPosts.size());
response.op = op; response.op = op;
List<Post> cachedPosts = new ArrayList<>();
List<Post> newPosts = new ArrayList<>();
if (cached.size() > 0) { if (cached.size() > 0) {
long deleteCheck = Time.startTiming();
// Add all posts that were parsed before // Add all posts that were parsed before
response.posts.addAll(cached); cachedPosts.addAll(cached);
Map<Integer, Post> cachedPostsByNo = new HashMap<>();
for (int i = 0; i < cachedPosts.size(); i++) {
Post post = cachedPosts.get(i);
cachedPostsByNo.put(post.no, post);
}
Map<Integer, Post> serverPostsByNo = new HashMap<>();
for (int i = 0; i < serverPosts.size(); i++) {
Post post = serverPosts.get(i);
serverPostsByNo.put(post.no, post);
}
// If there's a cached post but it's not in the list received from the server, mark it as deleted // If there's a cached post but it's not in the list received from the server, mark it as deleted
if (loadable.isThreadMode()) { if (loadable.isThreadMode()) {
boolean serverHas; for (int i = 0; i < cachedPosts.size(); i++) {
for (Post cache : cached) { Post cachedPost = cachedPosts.get(i);
serverHas = false; cachedPost.deleted.set(!serverPostsByNo.containsKey(cachedPost.no));
for (Post b : serverList) { }
if (b.no == cache.no) {
serverHas = true;
break;
} }
if (LOG_TIMING) {
Time.endTiming("Delete check", deleteCheck);
} }
long newCheck = Time.startTiming();
cache.deleted.set(!serverHas); // If there's a post in the list from the server, that's not in the cached list, add it.
for (int i = 0; i < serverPosts.size(); i++) {
Post serverPost = serverPosts.get(i);
if (!cachedPostsByNo.containsKey(serverPost.no)) {
newPosts.add(serverPost);
}
} }
if (LOG_TIMING) {
Time.endTiming("New check", newCheck);
}
} else {
newPosts.addAll(serverPosts);
} }
// If there's a post in the list from the server, that's not in the cached list, add it. List<Post> allPosts = new ArrayList<>(cachedPosts.size() + newPosts.size());
boolean known; allPosts.addAll(cachedPosts);
for (Post post : serverList) { allPosts.addAll(newPosts);
known = false;
for (Post cache : cached) { if (loadable.isThreadMode()) {
if (cache.no == post.no) { Map<Integer, Post> postsByNo = new HashMap<>();
known = true; for (int i = 0; i < allPosts.size(); i++) {
break; Post post = allPosts.get(i);
} postsByNo.put(post.no, post);
} }
if (!known) { // Maps post no's to a list of no's that that post received replies from
response.posts.add(post); Map<Integer, List<Integer>> replies = new HashMap<>();
long collectReplies = Time.startTiming();
for (int i = 0; i < allPosts.size(); i++) {
Post sourcePost = allPosts.get(i);
for (int replyTo : sourcePost.repliesTo) {
List<Integer> value = replies.get(replyTo);
if (value == null) {
value = new ArrayList<>(3);
replies.put(replyTo, value);
} }
value.add(sourcePost.no);
} }
} else {
response.posts.addAll(serverList);
} }
if (LOG_TIMING) {
Time.endTiming("Collect replies", collectReplies);
}
long mapReplies = Time.startTiming();
for (int i = 0; i < response.posts.size(); i++) { for (Map.Entry<Integer, List<Integer>> entry : replies.entrySet()) {
Post sourcePost = response.posts.get(i); int key = entry.getKey();
synchronized (sourcePost.repliesFrom) { List<Integer> value = entry.getValue();
sourcePost.repliesFrom.clear();
for (int j = i + 1; j < response.posts.size(); j++) { Post subject = postsByNo.get(key);
Post replyToSource = response.posts.get(j); synchronized (subject.repliesFrom) {
if (replyToSource.repliesTo.contains(sourcePost.no)) { subject.repliesFrom.clear();
sourcePost.repliesFrom.add(replyToSource.no); subject.repliesFrom.addAll(value);
} }
} }
if (LOG_TIMING) {
Time.endTiming("Map replies", mapReplies);
} }
} }
response.posts.addAll(allPosts);
return response; return response;
} }
private List<Post> loadThread(JsonReader reader) throws Exception { private void loadThread(JsonReader reader, ProcessingQueue queue, Map<Integer, Post> cachedByNo) throws Exception {
ArrayList<Post> list = new ArrayList<>();
reader.beginObject(); reader.beginObject();
// Page object // Page object
while (reader.hasNext()) { while (reader.hasNext()) {
@ -184,10 +295,7 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
// Thread array // Thread array
while (reader.hasNext()) { while (reader.hasNext()) {
// Thread object // Thread object
Post post = readPostObject(reader); readPostObject(reader, queue, cachedByNo);
if (post != null) {
list.add(post);
}
} }
reader.endArray(); reader.endArray();
} else { } else {
@ -195,13 +303,9 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
} }
} }
reader.endObject(); reader.endObject();
return list;
} }
private List<Post> loadCatalog(JsonReader reader) throws Exception { private void loadCatalog(JsonReader reader, ProcessingQueue queue, Map<Integer, Post> cachedByNo) throws Exception {
ArrayList<Post> list = new ArrayList<>();
reader.beginArray(); // Array of pages reader.beginArray(); // Array of pages
while (reader.hasNext()) { while (reader.hasNext()) {
@ -212,10 +316,7 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
reader.beginArray(); // Threads array reader.beginArray(); // Threads array
while (reader.hasNext()) { while (reader.hasNext()) {
Post post = readPostObject(reader); readPostObject(reader, queue, cachedByNo);
if (post != null) {
list.add(post);
}
} }
reader.endArray(); reader.endArray();
@ -228,11 +329,9 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
} }
reader.endArray(); reader.endArray();
return list;
} }
private Post readPostObject(JsonReader reader) throws Exception { private void readPostObject(JsonReader reader, ProcessingQueue queue, Map<Integer, Post> cachedByNo) throws Exception {
Post post = new Post(); Post post = new Post();
post.board = loadable.board; post.board = loadable.board;
@ -242,12 +341,7 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
switch (key) { switch (key) {
case "no": case "no":
// Post number
post.no = reader.nextInt(); post.no = reader.nextInt();
/*} else if (key.equals("time")) {
// Time
long time = reader.nextLong();
post.date = new Date(time * 1000);*/
break; break;
case "now": case "now":
post.date = reader.nextString(); post.date = reader.nextString();
@ -340,59 +434,23 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
op.uniqueIps = post.uniqueIps; op.uniqueIps = post.uniqueIps;
} }
Post cached = null; Post cached = cachedByNo.get(post.no);
for (Post item : this.cached) {
if (item.no == post.no) {
cached = item;
break;
}
}
if (cached != null) { if (cached != null) {
return cached; queue.cached.add(cached);
} else { } else {
// Process the filters before finish, because parsing the html is dependent on filter matches queue.toParse.add(post);
processPostFilter(post);
if (!post.finish()) {
Logger.e(TAG, "Incorrect data about post received for post " + post.no);
return null;
} else {
processPostAfterFinish(post);
return post;
}
} }
} }
private void processPostFilter(Post post) {
int filterSize = filters.size();
for (int i = 0; i < filterSize; i++) {
Filter filter = filters.get(i);
if (filterEngine.matches(filter, post)) {
FilterEngine.FilterAction action = FilterEngine.FilterAction.forId(filter.action);
switch (action) {
case COLOR:
post.filterHighlightedColor = filter.color;
break;
case HIDE:
post.filterStub = true;
break;
case REMOVE:
post.filterRemove = true;
break;
}
}
}
}
private void processPostAfterFinish(Post post) {
post.isSavedReply = databaseManager.getDatabaseSavedReplyManager().isSaved(post.board, post.no);
}
public static class ChanReaderResponse { public static class ChanReaderResponse {
// Op Post that is created new each time. // Op Post that is created new each time.
// Used to later copy members like image count to the real op on the main thread. // Used to later copy members like image count to the real op on the main thread.
public Post op; public Post op;
public List<Post> posts; public List<Post> posts;
} }
private static class ProcessingQueue {
public List<Post> cached = new ArrayList<>();
public List<Post> toParse = new ArrayList<>();
}
} }

@ -31,9 +31,11 @@ import org.floens.chan.utils.IOUtils;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException; import java.nio.charset.Charset;
public abstract class JsonReaderRequest<T> extends Request<T> { public abstract class JsonReaderRequest<T> extends Request<T> {
private static final Charset UTF8 = Charset.forName("UTF-8");
protected final Listener<T> listener; protected final Listener<T> listener;
public JsonReaderRequest(String url, Listener<T> listener, ErrorListener errorListener) { public JsonReaderRequest(String url, Listener<T> listener, ErrorListener errorListener) {
@ -50,19 +52,14 @@ public abstract class JsonReaderRequest<T> extends Request<T> {
@Override @Override
protected Response<T> parseNetworkResponse(NetworkResponse response) { protected Response<T> parseNetworkResponse(NetworkResponse response) {
ByteArrayInputStream baos = new ByteArrayInputStream(response.data); ByteArrayInputStream baos = new ByteArrayInputStream(response.data);
JsonReader reader = new JsonReader(new InputStreamReader(baos, UTF8));
JsonReader reader = null;
try {
reader = new JsonReader(new InputStreamReader(baos, "UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
Exception exception = null; Exception exception = null;
T read = null; T read = null;
try { try {
read = readJson(reader); read = readJson(reader);
} catch (Exception e) { } catch (Exception e) {
exception = e; exception = e;
} }

@ -0,0 +1,81 @@
/*
* Clover - 4chan browser https://github.com/Floens/Clover/
* Copyright (C) 2014 Floens
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.floens.chan.core.net;
import org.floens.chan.core.database.DatabaseSavedReplyManager;
import org.floens.chan.core.manager.FilterEngine;
import org.floens.chan.core.model.Filter;
import org.floens.chan.core.model.Post;
import org.floens.chan.utils.Logger;
import java.util.List;
import java.util.concurrent.Callable;
// Called concurrently to parse the post html and the filters on it
class PostParseCallable implements Callable<Post> {
private static final String TAG = "PostParseCallable";
private FilterEngine filterEngine;
private List<Filter> filters;
private DatabaseSavedReplyManager savedReplyManager;
private Post post;
public PostParseCallable(FilterEngine filterEngine, List<Filter> filters,
DatabaseSavedReplyManager savedReplyManager, Post post) {
this.filterEngine = filterEngine;
this.filters = filters;
this.savedReplyManager = savedReplyManager;
this.post = post;
}
@Override
public Post call() throws Exception {
// Process the filters before finish, because parsing the html is dependent on filter matches
processPostFilter(post);
if (!post.finish()) {
Logger.e(TAG, "Incorrect data about post received for post " + post.no);
return null;
}
post.isSavedReply = savedReplyManager.isSaved(post.board, post.no);
return post;
}
private void processPostFilter(Post post) {
int filterSize = filters.size();
for (int i = 0; i < filterSize; i++) {
Filter filter = filters.get(i);
if (filterEngine.matches(filter, post)) {
FilterEngine.FilterAction action = FilterEngine.FilterAction.forId(filter.action);
switch (action) {
case COLOR:
post.filterHighlightedColor = filter.color;
break;
case HIDE:
post.filterStub = true;
break;
case REMOVE:
post.filterRemove = true;
break;
}
}
}
}
}
Loading…
Cancel
Save