This is pretty much straight from the openNLP README
% echo "The openNLP project is also pretty cool, LGPL and it doesn't cost $3000" | ./textMe.sh [NP The/DT openNLP/JJ project/NN ] [VP is/VBZ ] [ADVP also/RB ] [ADJP pretty/RB cool/JJ ] ,/, [NP LGPL/NNP ] and/CC [NP it/PRP ] [VP does/VBZ n't/RB cost/VB ] 000/CD
The script is dumb:
_textMe_main() {
local classpath="output/opennlp-tools-1.4.3.jar:"
classpath="${classpath}:$( find lib -name "*.jar" |xargs |tr ' ' ':' )"
local jMe="java -classpath ${classpath}"
${jMe} opennlp.tools.lang.english.SentenceDetector \
english/sentdetect/EnglishSD.bin.gz \
| ${jMe} opennlp.tools.lang.english.Tokenizer \
english/tokenize/EnglishTok.bin.gz \
| ${jMe} opennlp.tools.lang.english.PosTagger -d \
english/parser/tagdict english/parser/tag.bin.gz \
| ${jMe} opennlp.tools.lang.english.TreebankChunker \
english/chunker/EnglishChunk.bin.gz
}
_textMe_main ${*}
Break it down:
% echo "This is a sentence? I know this is, Mr. Funzone... What will happen?" | ${jMe} opennlp.tools.lang.english.SentenceDetector english/sentdetect/EnglishSD.bin.gz
This is a sentence?
I know this is, Mr. Funzone... What will happen?
% echo "This is a sentence?" | ${jMe} opennlp.tools.lang.english.Tokenizer english/tokenize/EnglishTok.bin.gz
This is a sentence ?
% echo 'This is a sentence ?' | ${jMe} opennlp.tools.lang.english.PosTagger -d english/parser/tagdict english/parser/tag.bin.gz
This/DT is/VBZ a/DT sentence/NN ?/.
% echo 'This/DT is/VBZ a/DT sentence/NN ?/.' | ${jMe} opennlp.tools.lang.english.TreebankChunker english/chunker/EnglishChunk.bin.gz
[NP This/DT ] [VP is/VBZ ] [NP a/DT sentence/NN ] ?/.