diff --git a/annogen.py b/annogen.py
index 8f39036b66ff8a0fed0c6d5e6816277c22ce8da0..4c44816095390539b977c7ea7843e9679161f15d 100755
--- a/annogen.py
+++ b/annogen.py
@@ -2500,7 +2500,7 @@ def yarowsky_indicators(withAnnot_unistr,canBackground):
         if all(x.end()-x.start()==llen for x in re.finditer(re.escape(mdStart)+("("+re.escape(mdEnd)+"((?!"+re.escape(mdStart)+").)*.?"+re.escape(mdStart)+")?").join(re.escape(c) for c in list(nonAnnot)),corpus_unistr)):
           if nonAnnot==diagnose: diagnose_write("%s is default by majority-case rule after checking for dangerous overlaps etc" % (withAnnot_unistr,))
           yield True ; return
-    run_in_background = canBackground and len(okStarts) > 500 and executor # TODO: is this 500 threshold correct?
+    run_in_background = canBackground and len(okStarts) > 500 and executor # In a test with 300, 500, 700 and 900, the 500 threshold was fastest on concurrent.futures, but by just a few seconds.  TODO: does mpi4py.futures have a different 'sweet spot' here? (low priority unless we can get MPI to outdo concurrent.futures in this application)
     may_take_time = canBackground and len(okStarts) > 1000
     if may_take_time: sys.stderr.write("\nLarge collocation check (%s has %d matches + %s), %s....  \n" % (withAnnot_unistr.encode(terminal_charset,'replace'),len(okStarts),badInfo(badStarts,nonAnnot),cond(run_in_background,"backgrounding","could take some time")))
     if run_in_background: