git checkout f8cadf001224d751a30125193a084670d8026ed1 # the version I tested
pip install -e .""")# because pip install argos-translate had an error. Note that this command links the install to the current "argos-translate" directory, which should then not be removed.
fortaginre.findall(r"(?:<[^>]*>\s*)+",txt,flags=re.DOTALL):e2c[tag]=tag# keep (runs of) tags, TODO: might be better if we don't make them sentence objects
fori,kinenumerate(keyList):# TODO: this loop is slow: might want to get an annogen-generated annotator to do it (but there's the \b) or make an OR list like the annogen normaliser. But it's nowhere near the worst bottleneck (argostranslate w/out CUDA)
ifk.startswith("<"):txt=txt.replace(k," {%d} "%i)# irrespective of word boundaries. Spacing important. "I went to {1}Paris{2} last summer." upsets the model, as does doing it via Tags, as does using letters not numbers in the {}s
fori,kinenumerate(keyList):# TODO: this loop is slow: might want to get an annogen-generated annotator to do it (but there's the \b) or make an OR list like the annogen normaliser
ifk.startswith("<"):txt=txt.replace(k," {%d} "%i)# irrespective of word boundaries
else:txt=re.sub(r"\b"+re.escape(k)+r"\b"," {%d} "%i,txt,flags=0ifre.search("[A-Z]",k)elsere.IGNORECASE)# (don't match lower case if we have upper case, as it might be a name or abbreviation that in lower case will be a normal word and not this entry, but do match title case if we are lower case)
importargostranslate.translate
# TODO: might now want FAHClient --send-pause because xlator averages 2.5 cores and can read 3.5 (on a 4-core CPU)