1717
1818package org .elasticsearch .plugin .ingest .langdetect ;
1919
20+ import com .cybozu .labs .langdetect .LangDetectException ;
2021import com .cybozu .labs .langdetect .SecureDetectorFactory ;
2122import org .elasticsearch .common .settings .Settings ;
2223import org .elasticsearch .env .Environment ;
3031import java .util .Map ;
3132
3233import static org .hamcrest .Matchers .hasEntry ;
34+ import static org .hamcrest .Matchers .is ;
3335import static org .hamcrest .Matchers .not ;
3436
3537public class LangDetectProcessorTests extends ESTestCase {
@@ -42,23 +44,15 @@ public static void loadProfiles() throws Exception {
4244 }
4345
4446 public void testThatProcessorWorks () throws Exception {
45- Map <String , Object > config = new HashMap <>();
46- config .put ("field" , "source_field" );
47- config .put ("target_field" , "language" );
48- config .put ("ignore_missing" , false );
49-
50- Map <String , Object > data = ingestDocument (config ,
47+ Map <String , Object > data = ingestDocument (config ("source_field" , "language" , false ),
5148 "source_field" , "This is hopefully an english text, that will be detected." );
5249
5350 assertThat (data , hasEntry ("language" , "en" ));
5451 }
5552
5653 public void testMaxLengthConfiguration () throws Exception {
57- Map <String , Object > config = new HashMap <>();
58- config .put ("field" , "source_field" );
59- config .put ("target_field" , "language" );
54+ Map <String , Object > config = config ("source_field" , "language" , false );
6055 config .put ("max_length" , "20b" );
61- config .put ("ignore_missing" , false );
6256
6357 // a document with a lot of german text at the end, that should be ignored due to max length
6458 // copied from https://de.wikipedia.org/wiki/Unwetter_in_Mitteleuropa_2016
@@ -75,27 +69,24 @@ public void testMaxLengthConfiguration() throws Exception {
7569 }
7670
7771 public void testIgnoreMissingConfiguration () throws Exception {
78- Map <String , Object > config = new HashMap <>();
79- config .put ("field" , "missing_source_field" );
80- config .put ("target_field" , "language" );
81- config .put ("ignore_missing" , true );
82-
83- Map <String , Object > data = ingestDocument (config ,
72+ Map <String , Object > data = ingestDocument (config ("missing_source_field" , "language" , true ),
8473 "source_field" , "This is hopefully an english text, that will be detected." );
8574
8675 assertThat (data , not (hasEntry ("language" , "en" )));
8776 }
8877
8978 public void testEmptyString () throws Exception {
90- Map <String , Object > config = new HashMap <>();
91- config .put ("field" , "source_field" );
92- config .put ("target_field" , "language" );
93- config .put ("ignore_missing" , randomBoolean ());
94-
95- Map <String , Object > data = ingestDocument (config ,"source_field" , "" );
79+ Map <String , Object > data = ingestDocument (config ("source_field" , "language" , randomBoolean ()),"source_field" , "" );
9680
9781 assertThat (data , not (hasEntry ("language" , "en" )));
82+ }
83+
84+ public void testNumbersOnlyThrowsException () throws Exception {
85+ Map <String , Object > config = config ("source_field" , "language" , false );
86+ LangDetectException e = expectThrows (LangDetectException .class ,
87+ () -> ingestDocument (config , "source_field" , "124 56456 546 3432" ));
9888
89+ assertThat (e .getMessage (), is ("no features in text" ));
9990 }
10091
10192 private Map <String , Object > ingestDocument (Map <String , Object > config , String field , String value ) throws Exception {
@@ -106,4 +97,12 @@ private Map<String, Object> ingestDocument(Map<String, Object> config, String fi
10697 LangDetectProcessor processor = new LangDetectProcessor .Factory ().create (Collections .emptyMap (), randomAlphaOfLength (10 ), config );
10798 return processor .execute (ingestDocument ).getSourceAndMetadata ();
10899 }
100+
101+ private Map <String , Object > config (String sourceField , String targetField , boolean ignoreMissing ) {
102+ final Map <String , Object > config = new HashMap <>();
103+ config .put ("field" , sourceField );
104+ config .put ("target_field" , targetField );
105+ config .put ("ignore_missing" , ignoreMissing );
106+ return config ;
107+ }
109108}
0 commit comments