diff --git a/README.md b/README.md index 235398e..9ac8ba0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# VEVOS: Ground Truth Extraction v2.0.0 +# VEVOS: Ground Truth Extraction v2.1.0 VEVOS is a tool suite for the simulation of the evolution of clone-and-own projects and consists of two main components: The ground truth extraction, called VEVOS/Extraction and the variant simulation called VEVOS/Simulation. diff --git a/build-docker-image.sh b/build-docker-image.sh index a848be1..48f5b5e 100755 --- a/build-docker-image.sh +++ b/build-docker-image.sh @@ -1,3 +1,5 @@ -#! /bin/bash -docker build --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t extraction . +#!/bin/bash +USER_ID=$(id -u ${SUDO_USER:-$(whoami)}) +GROUP_ID=$(id -g ${SUDO_USER:-$(whoami)}) +docker build --build-arg USER_ID=$USER_ID --build-arg GROUP_ID=$GROUP_ID -t extraction . diff --git a/docker-resources/custom.md b/docker-resources/custom.md index 88015de..7387795 100644 --- a/docker-resources/custom.md +++ b/docker-resources/custom.md @@ -1,3 +1,3 @@ Project name | Domain | Source code available (**y**es/**n**o)? | Is it a git repository (**y**es/**n**o)? | Repository URL | Clone URL | Estimated number of commits -------------------|-------------------------|-----------------------------------------|------------------------------------------|--------------------------------------------------------------|----------------------------------------------------|----------------------------- -linux | operating system | y | y | https://github.com/torvalds/linux | https://github.com/DiffDetective/linux.git | 1,072,142 +argouml-spl | modelling tool | y | y | https://github.com/marcusvnac/argouml-spl.git | https://github.com/marcusvnac/argouml-spl.git | 23 diff --git a/docker-resources/custom.properties b/docker-resources/custom.properties index 28a1ba7..6753d33 100644 --- a/docker-resources/custom.properties +++ b/docker-resources/custom.properties @@ -4,9 +4,8 @@ # Define the path to the dataset here # You can also delete or add lines to the custom.md diff-detective.dataset-file=custom.md -###### -# Parameters that probably should not be changed -###### +# Which files to process, specified as comma-separated list of file extensions +extraction.file-extensions=java # Print the ground truth for each commit. Only activate this for the debugging of small datasets. extraction.print-enabled=false # Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider @@ -23,4 +22,6 @@ extraction.gt-save-dir=ground-truth # Number of threads to use diff-detective.num-threads=1 # Number of commits to process in a single batch by one thread -diff-detective.batch-size=8 \ No newline at end of file +diff-detective.batch-size=8 +# Use the parser for C preprocessor annotations (i.e., 'cpp') or for JavaPP annotations (i.e., 'jpp') +diff-detective.parser = jpp diff --git a/docker-resources/extract.sh b/docker-resources/extract.sh index 3b1218b..99958cc 100644 --- a/docker-resources/extract.sh +++ b/docker-resources/extract.sh @@ -6,41 +6,38 @@ echo "Files in ground-truth folder:" ls -l /home/user ls -l ground-truth -if [ "$1" == 'verification' ] -then - echo "Executing variability extraction defined in verification.properties." - PROPS=verification.properties -elif [ "$1" == 'custom' ] -then - echo "Executing variability extraction defined in verification.properties." - PROPS=custom.properties -elif [ "$1" == '--help' ] -then - echo "Examples:" - echo "# Run verification with small datasets" - echo "./start-extraction.sh verification fast" - echo "# Run extraction of all Diff Detective datasets apart from the linux kernel" - echo "./start-extraction.sh fast" - echo "# Run the extraction with a custom set of properties which have to be defined in docker-resources/custom.properties, BEFORE building the Docker image." - echo "./start-extraction.sh custom full" - exit 0 +if [ "$1" == 'verification' ]; then + echo "Executing variability extraction defined in verification.properties." + PROPS=verification.properties +elif [ "$1" == 'custom' ]; then + echo "Executing variability extraction defined in verification.properties." + PROPS=custom.properties +elif [ "$1" == '--help' ]; then + echo "Examples:" + echo "# Run verification with small datasets" + echo "./start-extraction.sh verification fast" + echo "# Run extraction of all Diff Detective datasets apart from the linux kernel" + echo "./start-extraction.sh fast" + echo "# Run the extraction with a custom set of properties which have to be defined in docker-resources/custom.properties, BEFORE building the Docker image." + echo "./start-extraction.sh custom full" + exit 0 else - echo "Executing variability extraction defined in without_linux.properties." - PROPS=without_linux.properties + echo "Executing variability extraction defined in without_linux.properties." + PROPS=without_linux.properties fi JAR=Extraction-jar-with-dependencies.jar -if [ "$1" == 'fast' ] || [ "$2" == 'fast' ] -then - EX_TYPE=org.variantsync.vevos.extraction.FastGroundTruthExtraction -elif [ "$1" == 'full' ] || [ "$2" == 'full' ] -then - EX_TYPE=org.variantsync.vevos.extraction.FullGroundTruthExtraction +if [ "$1" == 'fast' ] || [ "$2" == 'fast' ]; then + EX_TYPE=org.variantsync.vevos.extraction.FastGroundTruthExtraction +elif [ "$1" == 'full' ] || [ "$2" == 'full' ]; then + EX_TYPE=org.variantsync.vevos.extraction.FullGroundTruthExtraction else - echo "You either have to select the 'fast' or the 'full' extraction. See --help for more information" - exit 1 + echo "You either have to select the 'fast' or the 'full' extraction. See --help for more information" + exit 1 fi +touch log.txt java -Xmx128g -jar -Dtinylog.configuration=/home/user/tinylog.properties $JAR $PROPS $EX_TYPE -#java -jar -Dtinylog.configuration=/home/user/tinylog.properties $JAR $PROPS $EX_TYPE \ No newline at end of file +#java -jar -Dtinylog.configuration=/home/user/tinylog.properties $JAR $PROPS $EX_TYPE + diff --git a/docker-resources/verification.properties b/docker-resources/verification.properties index f55ddbb..8bf2fdb 100644 --- a/docker-resources/verification.properties +++ b/docker-resources/verification.properties @@ -3,6 +3,8 @@ ##################################### # Path to the dataset file diff-detective.dataset-file=verification.md +# Which files to process, specified as comma-separated list of file extensions +extraction.file-extensions=c,cpp,h,hpp # Print the ground truth for each commit. Only activate this for the debugging of small datasets. extraction.print-enabled=false # Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider @@ -19,4 +21,6 @@ extraction.gt-save-dir=ground-truth # Number of threads to use diff-detective.num-threads=1 # Number of commits to process in a single batch by one thread -diff-detective.batch-size=8 \ No newline at end of file +diff-detective.batch-size=8 +# Use the parser for C preprocessor annotations (i.e., 'cpp') or for JavaPP annotations (i.e., 'jpp') +diff-detective.parser = cpp \ No newline at end of file diff --git a/docker-resources/without_linux.properties b/docker-resources/without_linux.properties index dd90543..0bf2ad7 100644 --- a/docker-resources/without_linux.properties +++ b/docker-resources/without_linux.properties @@ -3,6 +3,8 @@ ##################################### # Path to the dataset file diff-detective.dataset-file=without_linux.md +# Which files to process, specified as comma-separated list of file extensions +extraction.file-extensions=c,cpp,h,hpp # Print the ground truth for each commit. Only activate this for the debugging of small datasets. extraction.print-enabled=false # Should changes to only the presence condition of source code be ignored? If set to true, VEVOS will only consider @@ -19,4 +21,6 @@ extraction.gt-save-dir=ground-truth # Number of threads to use diff-detective.num-threads=32 # Number of commits to process in a single batch by one thread -diff-detective.batch-size=8 \ No newline at end of file +diff-detective.batch-size=8 +# Use the parser for C preprocessor annotations (i.e., 'cpp') or for JavaPP annotations (i.e., 'jpp') +diff-detective.parser = cpp \ No newline at end of file diff --git a/local-maven-repo/deploy-diffdetective.sh b/local-maven-repo/deploy-diffdetective.sh old mode 100644 new mode 100755 index 51abfcb..9134366 --- a/local-maven-repo/deploy-diffdetective.sh +++ b/local-maven-repo/deploy-diffdetective.sh @@ -1,2 +1,2 @@ -mvn deploy:deploy-file -DgroupId=org.variantsync -DartifactId=diffdetective -Dversion=2.0.0 -Durl=file:../local-maven-repo/ -DrepositoryId=local-maven-repo -DupdateReleaseInfo=true -Dfile=../src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar +mvn deploy:deploy-file -DgroupId=org.variantsync -DartifactId=diffdetective -Dversion=2.2.0 -Durl=file:../local-maven-repo/ -DrepositoryId=local-maven-repo -DupdateReleaseInfo=true -Dfile=../src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar rm -rf ~/.m2/repository/org/variantsync/ \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.md5 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.md5 deleted file mode 100644 index 14f74b2..0000000 --- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.md5 +++ /dev/null @@ -1 +0,0 @@ -ee1c7a92c5821ec3ef274c90a84f8007 \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.sha1 deleted file mode 100644 index 89375a3..0000000 --- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8b4d455ba73b491c6ee9fdaa4611118c01ce8854 \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.md5 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.md5 deleted file mode 100644 index cd5bc49..0000000 --- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.md5 +++ /dev/null @@ -1 +0,0 @@ -c576e1263735be67975b272339c96074 \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.sha1 deleted file mode 100644 index 7fb0162..0000000 --- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom.sha1 +++ /dev/null @@ -1 +0,0 @@ -731b68a0a48f847b4faa30d76be94651c2aafd28 \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar similarity index 96% rename from local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar rename to local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar index 109dcaf..36344ec 100644 Binary files a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.jar and b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar differ diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.md5 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.md5 new file mode 100644 index 0000000..885b7eb --- /dev/null +++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.md5 @@ -0,0 +1 @@ +0b245d60c6129d43673bc34a5236b88c \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.sha1 new file mode 100644 index 0000000..712afc4 --- /dev/null +++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.jar.sha1 @@ -0,0 +1 @@ +36274fe1dba1cce46df2ace6991825681c36fb54 \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom similarity index 93% rename from local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom rename to local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom index 1d2dde4..d5b6a8f 100644 --- a/local-maven-repo/org/variantsync/diffdetective/2.0.0/diffdetective-2.0.0.pom +++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom @@ -4,5 +4,5 @@ 4.0.0 org.variantsync diffdetective - 2.0.0 + 2.2.0 diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.md5 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.md5 new file mode 100644 index 0000000..e350cf0 --- /dev/null +++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.md5 @@ -0,0 +1 @@ +cc9e2bde8437bcf67cbf4bd889a906ca \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.sha1 b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.sha1 new file mode 100644 index 0000000..198428c --- /dev/null +++ b/local-maven-repo/org/variantsync/diffdetective/2.2.0/diffdetective-2.2.0.pom.sha1 @@ -0,0 +1 @@ +289b68a0459bc9f42fcefae1a13d5f9d3cbfbffa \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml index b35b9d3..401d67c 100644 --- a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml +++ b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml @@ -3,11 +3,10 @@ org.variantsync diffdetective - 2.0.0 + 2.2.0 - 1.0.0 - 2.0.0 + 2.2.0 - 20231102083916 + 20240223170318 diff --git a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5 b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5 index 9310443..4f66912 100644 --- a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5 +++ b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.md5 @@ -1 +1 @@ -ee328247eea6708871ef37008de824e8 \ No newline at end of file +429d3bbaabca0f4b185822875d4970b5 \ No newline at end of file diff --git a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1 b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1 index a6faf09..5eca86e 100644 --- a/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1 +++ b/local-maven-repo/org/variantsync/diffdetective/maven-metadata.xml.sha1 @@ -1 +1 @@ -668bf5175e6c376a160f74b91ee06fae0c99b6d9 \ No newline at end of file +70470517c24ba88d3ebe718b9eee0f5ca8389d0c \ No newline at end of file diff --git a/pom.xml b/pom.xml index 18dfacb..e43637d 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ org.variantsync.vevos.extraction Extraction - 2.0.0 + 2.1.0 @@ -81,7 +81,7 @@ org.eclipse.jgit org.eclipse.jgit - 5.12.0.202106070339-r + 6.8.0.202311291450-r org.apache.commons @@ -91,7 +91,7 @@ net.lingala.zip4j zip4j - 2.11.3 + 2.11.5 org.tinylog @@ -106,7 +106,7 @@ org.variantsync diffdetective - 2.0.0 + 2.2.0 - \ No newline at end of file + diff --git a/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java b/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java index d386040..907f203 100644 --- a/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java +++ b/src/main/java/org/variantsync/vevos/extraction/ConfigProperties.java @@ -1,22 +1,15 @@ package org.variantsync.vevos.extraction; public class ConfigProperties { - public static final String PRINT_ENABLED - = "extraction.print-enabled"; - public static final String GT_SAVE_DIR - = "extraction.gt-save-dir"; - public static final String IGNORE_PC_CHANGES - = "extraction.ignore-pc-changes"; - public static final String DATASET_FILE - = "diff-detective.dataset-file"; - public static final String DD_OUTPUT_DIR - = "diff-detective.output-dir"; - public static final String REPO_SAVE_DIR - = "diff-detective.repo-storage-dir"; - public static final String NUM_THREADS - = "diff-detective.num-threads"; - public static final String BATCH_SIZE - = "diff-detective.batch-size"; - public static final String EXTRACT_CODE_MATCHING - = "extraction.extract-code-matching"; + public static final String PRINT_ENABLED = "extraction.print-enabled"; + public static final String GT_SAVE_DIR = "extraction.gt-save-dir"; + public static final String IGNORE_PC_CHANGES = "extraction.ignore-pc-changes"; + public static final String DATASET_FILE = "diff-detective.dataset-file"; + public static final String DD_OUTPUT_DIR = "diff-detective.output-dir"; + public static final String REPO_SAVE_DIR = "diff-detective.repo-storage-dir"; + public static final String NUM_THREADS = "diff-detective.num-threads"; + public static final String BATCH_SIZE = "diff-detective.batch-size"; + public static final String PARSER = "diff-detective.parser"; + public static final String EXTRACT_CODE_MATCHING = "extraction.extract-code-matching"; + public static final String FILE_EXTENSIONS = "extraction.file-extensions"; } diff --git a/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java b/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java index 5a94cf3..5b2c8dd 100644 --- a/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java +++ b/src/main/java/org/variantsync/vevos/extraction/GroundTruthExtraction.java @@ -5,6 +5,8 @@ import org.variantsync.diffdetective.datasets.PatchDiffParseOptions; import org.variantsync.diffdetective.datasets.Repository; import org.variantsync.diffdetective.diff.git.DiffFilter; +import org.variantsync.diffdetective.feature.AnnotationParser; +import org.variantsync.diffdetective.feature.PreprocessorAnnotationParser; import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions; import org.variantsync.vevos.extraction.gt.GroundTruth; @@ -123,6 +125,22 @@ public static Properties getProperties(File propertiesFile) { * @return The options instance */ public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) { + final String[] allowedFileExtensions; + String propertyValue = properties.getProperty(FILE_EXTENSIONS); + if (propertyValue == null) { + final String[] defaultExtensions = {"h", "hpp", "c", "cpp"}; + allowedFileExtensions = defaultExtensions; + } else { + allowedFileExtensions = propertyValue.split("\\w*,\\w*"); + } + + AnnotationParser parser; + switch (properties.getProperty(PARSER)) { + case "jpp" -> parser = PreprocessorAnnotationParser.JPPAnnotationParser; + case "cpp" -> parser = PreprocessorAnnotationParser.CPPAnnotationParser; + default -> throw new IllegalArgumentException("The parser " + properties.getProperty(PARSER) + + " is not supported. Choose between 'jpp' and 'cpp'"); + } return new AnalysisRunner.Options(Path.of(properties.getProperty(REPO_SAVE_DIR)), Path.of(properties.getProperty(DD_OUTPUT_DIR)), @@ -132,10 +150,9 @@ public static AnalysisRunner.Options diffdetectiveOptions(Properties properties) PatchDiffParseOptions.DiffStoragePolicy.DO_NOT_REMEMBER, new VariationDiffParseOptions( repoDefault.variationDiffParseOptions().annotationParser(), - false, false)); + false, false)).withAnnotationParser(parser); }, repo -> new DiffFilter.Builder().allowMerge(true) - // TODO: make configurable - .allowedFileExtensions("h", "hpp", "c", "cpp").build(), + .allowedFileExtensions(allowedFileExtensions).build(), true, false); } diff --git a/src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar b/src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar similarity index 96% rename from src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar rename to src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar index 109dcaf..36344ec 100644 Binary files a/src/main/resources/lib/diffdetective-2.0.0-jar-with-dependencies.jar and b/src/main/resources/lib/diffdetective-2.2.0-jar-with-dependencies.jar differ