Skip to content
This repository was archived by the owner on Oct 29, 2023. It is now read-only.

Commit f477feb

Browse files
committed
Add test for number detection
1 parent 96527b6 commit f477feb

File tree

1 file changed

+21
-22
lines changed

1 file changed

+21
-22
lines changed

src/test/java/org/elasticsearch/plugin/ingest/langdetect/LangDetectProcessorTests.java

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.elasticsearch.plugin.ingest.langdetect;
1919

20+
import com.cybozu.labs.langdetect.LangDetectException;
2021
import com.cybozu.labs.langdetect.SecureDetectorFactory;
2122
import org.elasticsearch.common.settings.Settings;
2223
import org.elasticsearch.env.Environment;
@@ -30,6 +31,7 @@
3031
import java.util.Map;
3132

3233
import static org.hamcrest.Matchers.hasEntry;
34+
import static org.hamcrest.Matchers.is;
3335
import static org.hamcrest.Matchers.not;
3436

3537
public class LangDetectProcessorTests extends ESTestCase {
@@ -42,23 +44,15 @@ public static void loadProfiles() throws Exception {
4244
}
4345

4446
public void testThatProcessorWorks() throws Exception {
45-
Map<String, Object> config = new HashMap<>();
46-
config.put("field", "source_field");
47-
config.put("target_field", "language");
48-
config.put("ignore_missing", false);
49-
50-
Map<String, Object> data = ingestDocument(config,
47+
Map<String, Object> data = ingestDocument(config("source_field", "language", false),
5148
"source_field", "This is hopefully an english text, that will be detected.");
5249

5350
assertThat(data, hasEntry("language", "en"));
5451
}
5552

5653
public void testMaxLengthConfiguration() throws Exception {
57-
Map<String, Object> config = new HashMap<>();
58-
config.put("field", "source_field");
59-
config.put("target_field", "language");
54+
Map<String, Object> config = config("source_field", "language", false);
6055
config.put("max_length", "20b");
61-
config.put("ignore_missing", false);
6256

6357
// a document with a lot of german text at the end, that should be ignored due to max length
6458
// copied from https://de.wikipedia.org/wiki/Unwetter_in_Mitteleuropa_2016
@@ -75,27 +69,24 @@ public void testMaxLengthConfiguration() throws Exception {
7569
}
7670

7771
public void testIgnoreMissingConfiguration() throws Exception {
78-
Map<String, Object> config = new HashMap<>();
79-
config.put("field", "missing_source_field");
80-
config.put("target_field", "language");
81-
config.put("ignore_missing", true);
82-
83-
Map<String, Object> data = ingestDocument(config,
72+
Map<String, Object> data = ingestDocument(config("missing_source_field", "language", true),
8473
"source_field", "This is hopefully an english text, that will be detected.");
8574

8675
assertThat(data, not(hasEntry("language", "en")));
8776
}
8877

8978
public void testEmptyString() throws Exception {
90-
Map<String, Object> config = new HashMap<>();
91-
config.put("field", "source_field");
92-
config.put("target_field", "language");
93-
config.put("ignore_missing", randomBoolean());
94-
95-
Map<String, Object> data = ingestDocument(config,"source_field", "");
79+
Map<String, Object> data = ingestDocument(config("source_field", "language", randomBoolean()),"source_field", "");
9680

9781
assertThat(data, not(hasEntry("language", "en")));
82+
}
83+
84+
public void testNumbersOnlyThrowsException() throws Exception {
85+
Map<String, Object> config = config("source_field", "language", false);
86+
LangDetectException e = expectThrows(LangDetectException.class,
87+
() -> ingestDocument(config, "source_field", "124 56456 546 3432"));
9888

89+
assertThat(e.getMessage(), is("no features in text"));
9990
}
10091

10192
private Map<String, Object> ingestDocument(Map<String, Object> config, String field, String value) throws Exception {
@@ -106,4 +97,12 @@ private Map<String, Object> ingestDocument(Map<String, Object> config, String fi
10697
LangDetectProcessor processor = new LangDetectProcessor.Factory().create(Collections.emptyMap(), randomAlphaOfLength(10), config);
10798
return processor.execute(ingestDocument).getSourceAndMetadata();
10899
}
100+
101+
private Map<String, Object> config(String sourceField, String targetField, boolean ignoreMissing) {
102+
final Map<String, Object> config = new HashMap<>();
103+
config.put("field", sourceField);
104+
config.put("target_field", targetField);
105+
config.put("ignore_missing", ignoreMissing);
106+
return config;
107+
}
109108
}

0 commit comments

Comments
 (0)