diff --git a/README.md b/README.md
index 235398e..9ac8ba0 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# VEVOS: Ground Truth Extraction v2.0.0
+# VEVOS: Ground Truth Extraction v2.1.0
VEVOS is a tool suite for the simulation of the evolution of clone-and-own projects and consists of two main components:
The ground truth extraction, called VEVOS/Extraction and the variant simulation called VEVOS/Simulation.
diff --git a/build-docker-image.sh b/build-docker-image.sh
index a848be1..48f5b5e 100755
--- a/build-docker-image.sh
+++ b/build-docker-image.sh
@@ -1,3 +1,5 @@
-#! /bin/bash
-docker build --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t extraction .
+#!/bin/bash
+USER_ID=$(id -u ${SUDO_USER:-$(whoami)})
+GROUP_ID=$(id -g ${SUDO_USER:-$(whoami)})
+docker build --build-arg USER_ID=$USER_ID --build-arg GROUP_ID=$GROUP_ID -t extraction .
diff --git a/docker-resources/custom.md b/docker-resources/custom.md
index 88015de..7387795 100644
--- a/docker-resources/custom.md
+++ b/docker-resources/custom.md
@@ -1,3 +1,3 @@
Project name | Domain | Source code available (**y**es/**n**o)? | Is it a git repository (**y**es/**n**o)? | Repository URL | Clone URL | Estimated number of commits
-------------------|-------------------------|-----------------------------------------|------------------------------------------|--------------------------------------------------------------|----------------------------------------------------|-----------------------------
-linux | operating system | y | y | https://github.com/torvalds/linux | https://github.com/DiffDetective/linux.git | 1,072,142
+argouml-spl | modelling tool | y | y | https://github.com/marcusvnac/argouml-spl.git | https://github.com/marcusvnac/argouml-spl.git | 23
diff --git a/docker-resources/custom.properties b/docker-resources/custom.properties
index 28a1ba7..6753d33 100644
--- a/docker-resources/custom.properties
+++ b/docker-resources/custom.properties
@@ -4,9 +4,8 @@
# Define the path to the dataset here
# You can also delete or add lines to the custom.md
diff-detective.dataset-file=custom.md
-######
-# Parameters that probably should not be changed
-######
+# Which files to process, specified as comma-separated list of file extensions
+extraction.file-extensions=java
# Print the ground truth for each commit. Only activate this for the debugging of small datasets.
extraction.print-enabled=false
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
@@ -23,4 +22,6 @@ extraction.gt-save-dir=ground-truth
# Number of threads to use
diff-detective.num-threads=1
# Number of commits to process in a single batch by one thread
-diff-detective.batch-size=8
\ No newline at end of file
+diff-detective.batch-size=8
+# Use the parser for C preprocessor annotations (i.e., 'cpp') or for JavaPP annotations (i.e., 'jpp')
+diff-detective.parser = jpp
diff --git a/docker-resources/extract.sh b/docker-resources/extract.sh
index 3b1218b..99958cc 100644
--- a/docker-resources/extract.sh
+++ b/docker-resources/extract.sh
@@ -6,41 +6,38 @@ echo "Files in ground-truth folder:"
ls -l /home/user
ls -l ground-truth
-if [ "$1" == 'verification' ]
-then
- echo "Executing variability extraction defined in verification.properties."
- PROPS=verification.properties
-elif [ "$1" == 'custom' ]
-then
- echo "Executing variability extraction defined in verification.properties."
- PROPS=custom.properties
-elif [ "$1" == '--help' ]
-then
- echo "Examples:"
- echo "# Run verification with small datasets"
- echo "./start-extraction.sh verification fast"
- echo "# Run extraction of all Diff Detective datasets apart from the linux kernel"
- echo "./start-extraction.sh fast"
- echo "# Run the extraction with a custom set of properties which have to be defined in docker-resources/custom.properties, BEFORE building the Docker image."
- echo "./start-extraction.sh custom full"
- exit 0
+if [ "$1" == 'verification' ]; then
+ echo "Executing variability extraction defined in verification.properties."
+ PROPS=verification.properties
+elif [ "$1" == 'custom' ]; then
+ echo "Executing variability extraction defined in verification.properties."
+ PROPS=custom.properties
+elif [ "$1" == '--help' ]; then
+ echo "Examples:"
+ echo "# Run verification with small datasets"
+ echo "./start-extraction.sh verification fast"
+ echo "# Run extraction of all Diff Detective datasets apart from the linux kernel"
+ echo "./start-extraction.sh fast"
+ echo "# Run the extraction with a custom set of properties which have to be defined in docker-resources/custom.properties, BEFORE building the Docker image."
+ echo "./start-extraction.sh custom full"
+ exit 0
else
- echo "Executing variability extraction defined in without_linux.properties."
- PROPS=without_linux.properties
+ echo "Executing variability extraction defined in without_linux.properties."
+ PROPS=without_linux.properties
fi
JAR=Extraction-jar-with-dependencies.jar
-if [ "$1" == 'fast' ] || [ "$2" == 'fast' ]
-then
- EX_TYPE=org.variantsync.vevos.extraction.FastGroundTruthExtraction
-elif [ "$1" == 'full' ] || [ "$2" == 'full' ]
-then
- EX_TYPE=org.variantsync.vevos.extraction.FullGroundTruthExtraction
+if [ "$1" == 'fast' ] || [ "$2" == 'fast' ]; then
+ EX_TYPE=org.variantsync.vevos.extraction.FastGroundTruthExtraction
+elif [ "$1" == 'full' ] || [ "$2" == 'full' ]; then
+ EX_TYPE=org.variantsync.vevos.extraction.FullGroundTruthExtraction
else
- echo "You either have to select the 'fast' or the 'full' extraction. See --help for more information"
- exit 1
+ echo "You either have to select the 'fast' or the 'full' extraction. See --help for more information"
+ exit 1
fi
+touch log.txt
java -Xmx128g -jar -Dtinylog.configuration=/home/user/tinylog.properties $JAR $PROPS $EX_TYPE
-#java -jar -Dtinylog.configuration=/home/user/tinylog.properties $JAR $PROPS $EX_TYPE
\ No newline at end of file
+#java -jar -Dtinylog.configuration=/home/user/tinylog.properties $JAR $PROPS $EX_TYPE
+
diff --git a/docker-resources/verification.properties b/docker-resources/verification.properties
index f55ddbb..8bf2fdb 100644
--- a/docker-resources/verification.properties
+++ b/docker-resources/verification.properties
@@ -3,6 +3,8 @@
#####################################
# Path to the dataset file
diff-detective.dataset-file=verification.md
+# Which files to process, specified as comma-separated list of file extensions
+extraction.file-extensions=c,cpp,h,hpp
# Print the ground truth for each commit. Only activate this for the debugging of small datasets.
extraction.print-enabled=false
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
@@ -19,4 +21,6 @@ extraction.gt-save-dir=ground-truth
# Number of threads to use
diff-detective.num-threads=1
# Number of commits to process in a single batch by one thread
-diff-detective.batch-size=8
\ No newline at end of file
+diff-detective.batch-size=8
+# Use the parser for C preprocessor annotations (i.e., 'cpp') or for JavaPP annotations (i.e., 'jpp')
+diff-detective.parser = cpp
\ No newline at end of file
diff --git a/docker-resources/without_linux.properties b/docker-resources/without_linux.properties
index dd90543..0bf2ad7 100644
--- a/docker-resources/without_linux.properties
+++ b/docker-resources/without_linux.properties
@@ -3,6 +3,8 @@
#####################################
# Path to the dataset file
diff-detective.dataset-file=without_linux.md
+# Which files to process, specified as comma-separated list of file extensions
+extraction.file-extensions=c,cpp,h,hpp
# Print the ground truth for each commit. Only activate this for the debugging of small datasets.
extraction.print-enabled=false
# Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider
@@ -19,4 +21,6 @@ extraction.gt-save-dir=ground-truth
# Number of threads to use
diff-detective.num-threads=32
# Number of commits to process in a single batch by one thread
-diff-detective.batch-size=8
\ No newline at end of file
+diff-detective.batch-size=8
+# Use the parser for C preprocessor annotations (i.e., 'cpp') or for JavaPP annotations (i.e., 'jpp')
+diff-detective.parser = cpp
\ No newline at end of file
diff --git a/local-maven-repo/deploy-diffdetective.sh b/local-maven-repo/deploy-diffdetective.sh
old mode 100644
new mode 100755
index 51abfcb..9134366
--- a/local-maven-repo/deploy-diffdetective.sh
+++ b/local-maven-repo/deploy-diffdetective.sh
@@ -1,2 +1,2 @@
-mvn deploy:deploy-file -DgroupId=org.variantsync -DartifactId=diffdetective -Dversion=2.0.0 -Durl=file:../local-maven-repo/ -DrepositoryId=local-maven-repo -DupdateReleaseInfo=true -Dfile=../src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar
+mvn deploy:deploy-file -DgroupId=org.variantsync -DartifactId=diffdetective -Dversion=2.2.0 -Durl=file:../local-maven-repo/ -DrepositoryId=local-maven-repo -DupdateReleaseInfo=true -Dfile=../src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar
rm -rf ~/.m2/repository/org/variantsync/
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.md5 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.md5
deleted file mode 100644
index 14f74b2..0000000
--- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.md5
+++ /dev/null
@@ -1 +0,0 @@
-ee1c7a92c5821ec3ef274c90a84f8007
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.sha1
deleted file mode 100644
index 89375a3..0000000
--- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-8b4d455ba73b491c6ee9fdaa4611118c01ce8854
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.md5 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.md5
deleted file mode 100644
index cd5bc49..0000000
--- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.md5
+++ /dev/null
@@ -1 +0,0 @@
-c576e1263735be67975b272339c96074
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.sha1
deleted file mode 100644
index 7fb0162..0000000
--- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.sha1
+++ /dev/null
@@ -1 +0,0 @@
-731b68a0a48f847b4faa30d76be94651c2aafd28
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar
similarity index 96%
rename from local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar
rename to local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar
index 109dcaf..36344ec 100644
Binary files a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar and b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar differ
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.md5 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.md5
new file mode 100644
index 0000000..885b7eb
--- /dev/null
+++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.md5
@@ -0,0 +1 @@
+0b245d60c6129d43673bc34a5236b88c
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.sha1
new file mode 100644
index 0000000..712afc4
--- /dev/null
+++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.sha1
@@ -0,0 +1 @@
+36274fe1dba1cce46df2ace6991825681c36fb54
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom
similarity index 93%
rename from local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom
rename to local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom
index 1d2dde4..d5b6a8f 100644
--- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom
+++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom
@@ -4,5 +4,5 @@
4.0.0
org.variantsync
diffdetective
- 2.0.0
+ 2.2.0
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.md5 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.md5
new file mode 100644
index 0000000..e350cf0
--- /dev/null
+++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.md5
@@ -0,0 +1 @@
+cc9e2bde8437bcf67cbf4bd889a906ca
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.sha1
new file mode 100644
index 0000000..198428c
--- /dev/null
+++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.sha1
@@ -0,0 +1 @@
+289b68a0459bc9f42fcefae1a13d5f9d3cbfbffa
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml
index b35b9d3..401d67c 100644
--- a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml
+++ b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml
@@ -3,11 +3,10 @@
org.variantsync
diffdetective
- 2.0.0
+ 2.2.0
- 1.0.0
- 2.0.0
+ 2.2.0
- 20231102083916
+ 20240223170318
diff --git a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5 b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5
index 9310443..4f66912 100644
--- a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5
+++ b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5
@@ -1 +1 @@
-ee328247eea6708871ef37008de824e8
\ No newline at end of file
+429d3bbaabca0f4b185822875d4970b5
\ No newline at end of file
diff --git a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1 b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1
index a6faf09..5eca86e 100644
--- a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1
+++ b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1
@@ -1 +1 @@
-668bf5175e6c376a160f74b91ee06fae0c99b6d9
\ No newline at end of file
+70470517c24ba88d3ebe718b9eee0f5ca8389d0c
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 18dfacb..e43637d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,7 +8,7 @@
org.variantsync.vevos.extraction
Extraction
- 2.0.0
+ 2.1.0
@@ -81,7 +81,7 @@
org.eclipse.jgit
org.eclipse.jgit
- 5.12.0.202106070339-r
+ 6.8.0.202311291450-r
org.apache.commons
@@ -91,7 +91,7 @@
net.lingala.zip4j
zip4j
- 2.11.3
+ 2.11.5
org.tinylog
@@ -106,7 +106,7 @@
org.variantsync
diffdetective
- 2.0.0
+ 2.2.0
-
\ No newline at end of file
+
diff --git a/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java b/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java
index d386040..907f203 100644
--- a/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java
+++ b/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java
@@ -1,22 +1,15 @@
package org.variantsync.vevos.extraction;
public class ConfigProperties {
- public static final String PRINT_ENABLED
- = "extraction.print-enabled";
- public static final String GT_SAVE_DIR
- = "extraction.gt-save-dir";
- public static final String IGNORE_PC_CHANGES
- = "extraction.ignore-pc-changes";
- public static final String DATASET_FILE
- = "diff-detective.dataset-file";
- public static final String DD_OUTPUT_DIR
- = "diff-detective.output-dir";
- public static final String REPO_SAVE_DIR
- = "diff-detective.repo-storage-dir";
- public static final String NUM_THREADS
- = "diff-detective.num-threads";
- public static final String BATCH_SIZE
- = "diff-detective.batch-size";
- public static final String EXTRACT_CODE_MATCHING
- = "extraction.extract-code-matching";
+ public static final String PRINT_ENABLED = "extraction.print-enabled";
+ public static final String GT_SAVE_DIR = "extraction.gt-save-dir";
+ public static final String IGNORE_PC_CHANGES = "extraction.ignore-pc-changes";
+ public static final String DATASET_FILE = "diff-detective.dataset-file";
+ public static final String DD_OUTPUT_DIR = "diff-detective.output-dir";
+ public static final String REPO_SAVE_DIR = "diff-detective.repo-storage-dir";
+ public static final String NUM_THREADS = "diff-detective.num-threads";
+ public static final String BATCH_SIZE = "diff-detective.batch-size";
+ public static final String PARSER = "diff-detective.parser";
+ public static final String EXTRACT_CODE_MATCHING = "extraction.extract-code-matching";
+ public static final String FILE_EXTENSIONS = "extraction.file-extensions";
}
diff --git a/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java b/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java
index 5a94cf3..5b2c8dd 100644
--- a/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java
+++ b/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java
@@ -5,6 +5,8 @@
import org.variantsync.diffdetective.datasets.PatchDiffParseOptions;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.git.DiffFilter;
+import org.variantsync.diffdetective.feature.AnnotationParser;
+import org.variantsync.diffdetective.feature.PreprocessorAnnotationParser;
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions;
import org.variantsync.vevos.extraction.gt.GroundTruth;
@@ -123,6 +125,22 @@ public static Properties getProperties(File propertiesFile) {
* @return The options instance
*/
public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) {
+ final String[] allowedFileExtensions;
+ String propertyValue = properties.getProperty(FILE_EXTENSIONS);
+ if (propertyValue == null) {
+ final String[] defaultExtensions = {"h", "hpp", "c", "cpp"};
+ allowedFileExtensions = defaultExtensions;
+ } else {
+ allowedFileExtensions = propertyValue.split("\\w*,\\w*");
+ }
+
+ AnnotationParser parser;
+ switch (properties.getProperty(PARSER)) {
+ case "jpp" -> parser = PreprocessorAnnotationParser.JPPAnnotationParser;
+ case "cpp" -> parser = PreprocessorAnnotationParser.CPPAnnotationParser;
+ default -> throw new IllegalArgumentException("The parser " + properties.getProperty(PARSER) +
+ " is not supported. Choose between 'jpp' and 'cpp'");
+ }
return new AnalysisRunner.Options(Path.of(properties.getProperty(REPO_SAVE_DIR)),
Path.of(properties.getProperty(DD_OUTPUT_DIR)),
@@ -132,10 +150,9 @@ public static AnalysisRunner.Options diffdetectiveOptions(Properties properties)
PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER,
new VariationDiffParseOptions(
repoDefault.variationDiffParseOptions().annotationParser(),
- false, false));
+ false, false)).withAnnotationParser(parser);
}, repo -> new DiffFilter.Builder().allowMerge(true)
- // TODO: make configurable
- .allowedFileExtensions("h", "hpp", "c", "cpp").build(),
+ .allowedFileExtensions(allowedFileExtensions).build(),
true, false);
}
diff --git a/src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar b/src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar
similarity index 96%
rename from src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar
rename to src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar
index 109dcaf..36344ec 100644
Binary files a/src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar and b/src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar differ