Optimize parsing with multithreading

Various other optimizations
multisite
Floens 9 years ago
parent 4fb752f297
commit 257bf13480
  1. 7
      Clover/app/src/main/java/org/floens/chan/chan/ChanParser.java
  2. 26
      Clover/app/src/main/java/org/floens/chan/core/manager/FilterEngine.java
  3. 7
      Clover/app/src/main/java/org/floens/chan/core/model/Filter.java
  4. 11
      Clover/app/src/main/java/org/floens/chan/core/model/Post.java
  5. 274
      Clover/app/src/main/java/org/floens/chan/core/net/ChanReaderRequest.java
  6. 13
      Clover/app/src/main/java/org/floens/chan/core/net/JsonReaderRequest.java
  7. 81
      Clover/app/src/main/java/org/floens/chan/core/net/PostParseCallable.java

@ -92,13 +92,12 @@ public class ChanParser {
e.printStackTrace();
}
if (!post.parsedSpans) {
post.parsedSpans = true;
parseSpans(theme, post);
}
parseSpans(theme, post);
if (post.rawComment != null) {
post.comment = parseComment(theme, post, post.rawComment);
} else {
post.comment = "";
}
}

@ -29,7 +29,9 @@ import org.floens.chan.utils.Logger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@ -67,6 +69,8 @@ public class FilterEngine {
}
}
private final Map<String, Pattern> patternCache = new HashMap<>();
private final DatabaseManager databaseManager;
private final DatabaseFilterManager databaseFilterManager;
@ -133,21 +137,25 @@ public class FilterEngine {
}
if (matchRegex) {
Matcher matcher = null;
Pattern pattern = null;
if (!forceCompile) {
matcher = filter.compiledMatcher;
synchronized (patternCache) {
pattern = patternCache.get(filter.pattern);
}
}
if (matcher == null) {
Pattern compiledPattern = compile(filter.pattern);
if (compiledPattern != null) {
matcher = filter.compiledMatcher = compiledPattern.matcher("");
Logger.d(TAG, "Resulting pattern: " + filter.compiledMatcher);
if (pattern == null) {
pattern = compile(filter.pattern);
if (pattern != null) {
synchronized (patternCache) {
patternCache.put(filter.pattern, pattern);
}
Logger.d(TAG, "Resulting pattern: " + pattern.pattern());
}
}
if (matcher != null) {
matcher.reset(text);
if (pattern != null) {
Matcher matcher = pattern.matcher(text);
try {
return matcher.find();
} catch (IllegalArgumentException e) {

@ -22,8 +22,6 @@ import com.j256.ormlite.table.DatabaseTable;
import org.floens.chan.core.manager.FilterType;
import java.util.regex.Matcher;
@DatabaseTable
public class Filter {
@DatabaseField(generatedId = true)
@ -51,11 +49,6 @@ public class Filter {
@DatabaseField(canBeNull = false)
public int color;
/**
* Cached version of {@link #pattern} compiled by {@link org.floens.chan.core.manager.FilterEngine#compile(String)}.
*/
public Matcher compiledMatcher;
public boolean hasFilter(FilterType filterType) {
return (type & filterType.flag) != 0;
}

@ -110,8 +110,6 @@ public class Post {
public final ArrayList<PostLinkable> linkables = new ArrayList<>();
public boolean parsedSpans = false;
public SpannableString subjectSpan;
public SpannableString nameSpan;
@ -149,16 +147,15 @@ public class Post {
* @return false if this data is invalid
*/
public boolean finish() {
if (board == null)
return false;
if (no < 0 || resto < 0 || date == null || time < 0)
if (board == null || no < 0 || resto < 0 || date == null || time < 0) {
return false;
}
isOP = resto == 0;
if (isOP && (replies < 0 || images < 0))
if (isOP && (replies < 0 || images < 0)) {
return false;
}
if (filename != null && ext != null && imageWidth > 0 && imageHeight > 0 && tim >= 0) {
hasImage = true;

@ -25,29 +25,50 @@ import com.android.volley.Response.Listener;
import org.floens.chan.Chan;
import org.floens.chan.chan.ChanUrls;
import org.floens.chan.core.database.DatabaseManager;
import org.floens.chan.core.database.DatabaseSavedReplyManager;
import org.floens.chan.core.manager.FilterEngine;
import org.floens.chan.core.model.Filter;
import org.floens.chan.core.model.Loadable;
import org.floens.chan.core.model.Post;
import org.floens.chan.utils.Logger;
import org.floens.chan.utils.Time;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanReaderResponse> {
private static final String TAG = "ChanReaderRequest";
private static final boolean LOG_TIMING = false;
private static final int THREAD_COUNT;
private static ExecutorService EXECUTOR;
static {
THREAD_COUNT = Runtime.getRuntime().availableProcessors();
EXECUTOR = Executors.newFixedThreadPool(THREAD_COUNT);
}
private Loadable loadable;
private List<Post> cached;
private Post op;
private FilterEngine filterEngine;
private DatabaseManager databaseManager;
private DatabaseSavedReplyManager databaseSavedReplyManager;
private List<Filter> filters;
private long startLoad;
private ChanReaderRequest(String url, Listener<ChanReaderResponse> listener, ErrorListener errorListener) {
super(url, listener, errorListener);
filterEngine = FilterEngine.getInstance();
databaseManager = Chan.getDatabaseManager();
databaseSavedReplyManager = databaseManager.getDatabaseSavedReplyManager();
}
public static ChanReaderRequest newInstance(Loadable loadable, List<Post> cached, Listener<ChanReaderResponse> listener, ErrorListener errorListener) {
@ -87,6 +108,8 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
}
}
request.startLoad = Time.startTiming();
return request;
}
@ -97,84 +120,172 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
@Override
public ChanReaderResponse readJson(JsonReader reader) throws Exception {
List<Post> list;
if (LOG_TIMING) {
Time.endTiming("Network", startLoad);
}
long load = Time.startTiming();
ProcessingQueue processing = new ProcessingQueue();
Map<Integer, Post> cachedByNo = new HashMap<>();
for (int i = 0; i < cached.size(); i++) {
Post cache = cached.get(i);
cachedByNo.put(cache.no, cache);
}
if (loadable.isThreadMode()) {
list = loadThread(reader);
loadThread(reader, processing, cachedByNo);
} else if (loadable.isCatalogMode()) {
list = loadCatalog(reader);
loadCatalog(reader, processing, cachedByNo);
} else {
throw new IllegalArgumentException("Unknown mode");
}
if (LOG_TIMING) {
Time.endTiming("Load json", load);
}
List<Post> list = parsePosts(processing);
return processPosts(list);
}
private ChanReaderResponse processPosts(List<Post> serverList) throws Exception {
// Concurrently parses the new posts with an executor
private List<Post> parsePosts(ProcessingQueue queue) throws InterruptedException, ExecutionException {
long parsePosts = Time.startTiming();
List<Post> total = new ArrayList<>();
total.addAll(queue.cached);
List<Callable<Post>> tasks = new ArrayList<>(queue.toParse.size());
for (int i = 0; i < queue.toParse.size(); i++) {
Post post = queue.toParse.get(i);
tasks.add(new PostParseCallable(filterEngine, filters, databaseSavedReplyManager, post));
}
if (!tasks.isEmpty()) {
List<Future<Post>> futures = EXECUTOR.invokeAll(tasks);
for (int i = 0; i < futures.size(); i++) {
Future<Post> future = futures.get(i);
Post parsedPost = future.get();
if (parsedPost != null) {
total.add(parsedPost);
}
}
if (LOG_TIMING) {
Time.endTiming("Parse posts with " + THREAD_COUNT + " threads", parsePosts);
}
}
return total;
}
private ChanReaderResponse processPosts(List<Post> serverPosts) throws Exception {
ChanReaderResponse response = new ChanReaderResponse();
response.posts = new ArrayList<>(serverList.size());
response.posts = new ArrayList<>(serverPosts.size());
response.op = op;
List<Post> cachedPosts = new ArrayList<>();
List<Post> newPosts = new ArrayList<>();
if (cached.size() > 0) {
long deleteCheck = Time.startTiming();
// Add all posts that were parsed before
response.posts.addAll(cached);
cachedPosts.addAll(cached);
Map<Integer, Post> cachedPostsByNo = new HashMap<>();
for (int i = 0; i < cachedPosts.size(); i++) {
Post post = cachedPosts.get(i);
cachedPostsByNo.put(post.no, post);
}
Map<Integer, Post> serverPostsByNo = new HashMap<>();
for (int i = 0; i < serverPosts.size(); i++) {
Post post = serverPosts.get(i);
serverPostsByNo.put(post.no, post);
}
// If there's a cached post but it's not in the list received from the server, mark it as deleted
if (loadable.isThreadMode()) {
boolean serverHas;
for (Post cache : cached) {
serverHas = false;
for (Post b : serverList) {
if (b.no == cache.no) {
serverHas = true;
break;
}
}
cache.deleted.set(!serverHas);
for (int i = 0; i < cachedPosts.size(); i++) {
Post cachedPost = cachedPosts.get(i);
cachedPost.deleted.set(!serverPostsByNo.containsKey(cachedPost.no));
}
}
if (LOG_TIMING) {
Time.endTiming("Delete check", deleteCheck);
}
long newCheck = Time.startTiming();
// If there's a post in the list from the server, that's not in the cached list, add it.
boolean known;
for (Post post : serverList) {
known = false;
for (Post cache : cached) {
if (cache.no == post.no) {
known = true;
break;
}
}
if (!known) {
response.posts.add(post);
for (int i = 0; i < serverPosts.size(); i++) {
Post serverPost = serverPosts.get(i);
if (!cachedPostsByNo.containsKey(serverPost.no)) {
newPosts.add(serverPost);
}
}
if (LOG_TIMING) {
Time.endTiming("New check", newCheck);
}
} else {
response.posts.addAll(serverList);
newPosts.addAll(serverPosts);
}
for (int i = 0; i < response.posts.size(); i++) {
Post sourcePost = response.posts.get(i);
synchronized (sourcePost.repliesFrom) {
sourcePost.repliesFrom.clear();
List<Post> allPosts = new ArrayList<>(cachedPosts.size() + newPosts.size());
allPosts.addAll(cachedPosts);
allPosts.addAll(newPosts);
if (loadable.isThreadMode()) {
Map<Integer, Post> postsByNo = new HashMap<>();
for (int i = 0; i < allPosts.size(); i++) {
Post post = allPosts.get(i);
postsByNo.put(post.no, post);
}
// Maps post no's to a list of no's that that post received replies from
Map<Integer, List<Integer>> replies = new HashMap<>();
for (int j = i + 1; j < response.posts.size(); j++) {
Post replyToSource = response.posts.get(j);
if (replyToSource.repliesTo.contains(sourcePost.no)) {
sourcePost.repliesFrom.add(replyToSource.no);
long collectReplies = Time.startTiming();
for (int i = 0; i < allPosts.size(); i++) {
Post sourcePost = allPosts.get(i);
for (int replyTo : sourcePost.repliesTo) {
List<Integer> value = replies.get(replyTo);
if (value == null) {
value = new ArrayList<>(3);
replies.put(replyTo, value);
}
value.add(sourcePost.no);
}
}
if (LOG_TIMING) {
Time.endTiming("Collect replies", collectReplies);
}
long mapReplies = Time.startTiming();
for (Map.Entry<Integer, List<Integer>> entry : replies.entrySet()) {
int key = entry.getKey();
List<Integer> value = entry.getValue();
Post subject = postsByNo.get(key);
synchronized (subject.repliesFrom) {
subject.repliesFrom.clear();
subject.repliesFrom.addAll(value);
}
}
if (LOG_TIMING) {
Time.endTiming("Map replies", mapReplies);
}
}
response.posts.addAll(allPosts);
return response;
}
private List<Post> loadThread(JsonReader reader) throws Exception {
ArrayList<Post> list = new ArrayList<>();
private void loadThread(JsonReader reader, ProcessingQueue queue, Map<Integer, Post> cachedByNo) throws Exception {
reader.beginObject();
// Page object
while (reader.hasNext()) {
@ -184,10 +295,7 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
// Thread array
while (reader.hasNext()) {
// Thread object
Post post = readPostObject(reader);
if (post != null) {
list.add(post);
}
readPostObject(reader, queue, cachedByNo);
}
reader.endArray();
} else {
@ -195,13 +303,9 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
}
}
reader.endObject();
return list;
}
private List<Post> loadCatalog(JsonReader reader) throws Exception {
ArrayList<Post> list = new ArrayList<>();
private void loadCatalog(JsonReader reader, ProcessingQueue queue, Map<Integer, Post> cachedByNo) throws Exception {
reader.beginArray(); // Array of pages
while (reader.hasNext()) {
@ -212,10 +316,7 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
reader.beginArray(); // Threads array
while (reader.hasNext()) {
Post post = readPostObject(reader);
if (post != null) {
list.add(post);
}
readPostObject(reader, queue, cachedByNo);
}
reader.endArray();
@ -228,11 +329,9 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
}
reader.endArray();
return list;
}
private Post readPostObject(JsonReader reader) throws Exception {
private void readPostObject(JsonReader reader, ProcessingQueue queue, Map<Integer, Post> cachedByNo) throws Exception {
Post post = new Post();
post.board = loadable.board;
@ -242,12 +341,7 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
switch (key) {
case "no":
// Post number
post.no = reader.nextInt();
/*} else if (key.equals("time")) {
// Time
long time = reader.nextLong();
post.date = new Date(time * 1000);*/
break;
case "now":
post.date = reader.nextString();
@ -340,59 +434,23 @@ public class ChanReaderRequest extends JsonReaderRequest<ChanReaderRequest.ChanR
op.uniqueIps = post.uniqueIps;
}
Post cached = null;
for (Post item : this.cached) {
if (item.no == post.no) {
cached = item;
break;
}
}
Post cached = cachedByNo.get(post.no);
if (cached != null) {
return cached;
queue.cached.add(cached);
} else {
// Process the filters before finish, because parsing the html is dependent on filter matches
processPostFilter(post);
if (!post.finish()) {
Logger.e(TAG, "Incorrect data about post received for post " + post.no);
return null;
} else {
processPostAfterFinish(post);
return post;
}
}
}
private void processPostFilter(Post post) {
int filterSize = filters.size();
for (int i = 0; i < filterSize; i++) {
Filter filter = filters.get(i);
if (filterEngine.matches(filter, post)) {
FilterEngine.FilterAction action = FilterEngine.FilterAction.forId(filter.action);
switch (action) {
case COLOR:
post.filterHighlightedColor = filter.color;
break;
case HIDE:
post.filterStub = true;
break;
case REMOVE:
post.filterRemove = true;
break;
}
}
queue.toParse.add(post);
}
}
private void processPostAfterFinish(Post post) {
post.isSavedReply = databaseManager.getDatabaseSavedReplyManager().isSaved(post.board, post.no);
}
public static class ChanReaderResponse {
// Op Post that is created new each time.
// Used to later copy members like image count to the real op on the main thread.
public Post op;
public List<Post> posts;
}
private static class ProcessingQueue {
public List<Post> cached = new ArrayList<>();
public List<Post> toParse = new ArrayList<>();
}
}

@ -31,9 +31,11 @@ import org.floens.chan.utils.IOUtils;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
public abstract class JsonReaderRequest<T> extends Request<T> {
private static final Charset UTF8 = Charset.forName("UTF-8");
protected final Listener<T> listener;
public JsonReaderRequest(String url, Listener<T> listener, ErrorListener errorListener) {
@ -50,19 +52,14 @@ public abstract class JsonReaderRequest<T> extends Request<T> {
@Override
protected Response<T> parseNetworkResponse(NetworkResponse response) {
ByteArrayInputStream baos = new ByteArrayInputStream(response.data);
JsonReader reader = null;
try {
reader = new JsonReader(new InputStreamReader(baos, "UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
JsonReader reader = new JsonReader(new InputStreamReader(baos, UTF8));
Exception exception = null;
T read = null;
try {
read = readJson(reader);
} catch (Exception e) {
exception = e;
}

@ -0,0 +1,81 @@
/*
* Clover - 4chan browser https://github.com/Floens/Clover/
* Copyright (C) 2014 Floens
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.floens.chan.core.net;
import org.floens.chan.core.database.DatabaseSavedReplyManager;
import org.floens.chan.core.manager.FilterEngine;
import org.floens.chan.core.model.Filter;
import org.floens.chan.core.model.Post;
import org.floens.chan.utils.Logger;
import java.util.List;
import java.util.concurrent.Callable;
// Called concurrently to parse the post html and the filters on it
class PostParseCallable implements Callable<Post> {
private static final String TAG = "PostParseCallable";
private FilterEngine filterEngine;
private List<Filter> filters;
private DatabaseSavedReplyManager savedReplyManager;
private Post post;
public PostParseCallable(FilterEngine filterEngine, List<Filter> filters,
DatabaseSavedReplyManager savedReplyManager, Post post) {
this.filterEngine = filterEngine;
this.filters = filters;
this.savedReplyManager = savedReplyManager;
this.post = post;
}
@Override
public Post call() throws Exception {
// Process the filters before finish, because parsing the html is dependent on filter matches
processPostFilter(post);
if (!post.finish()) {
Logger.e(TAG, "Incorrect data about post received for post " + post.no);
return null;
}
post.isSavedReply = savedReplyManager.isSaved(post.board, post.no);
return post;
}
private void processPostFilter(Post post) {
int filterSize = filters.size();
for (int i = 0; i < filterSize; i++) {
Filter filter = filters.get(i);
if (filterEngine.matches(filter, post)) {
FilterEngine.FilterAction action = FilterEngine.FilterAction.forId(filter.action);
switch (action) {
case COLOR:
post.filterHighlightedColor = filter.color;
break;
case HIDE:
post.filterStub = true;
break;
case REMOVE:
post.filterRemove = true;
break;
}
}
}
}
}
Loading…
Cancel
Save