Jean-Francois Leveque

Ajout paramétrage seuil d'annotation

......@@ -51,6 +51,7 @@ public class PreprocessingRunner implements ApplicationRunner {
private Boolean ratings;
private float annotatePercent;
private int annotateThreshold;
@Override
public void run(ApplicationArguments applicationArguments) throws Exception {
......@@ -58,7 +59,7 @@ public class PreprocessingRunner implements ApplicationRunner {
setFilenames();
List<AssociationElement> associationElements = loadAssociationElements(new File(dataDir, completeFilename));
// associationElements = cleanupSmallCounts(associationElements, 1, 1);
List<AssociationElement> annotableElements = removeFirstSmallCounts(associationElements, 1, 1);
List<AssociationElement> annotableElements = removeFirstSmallCounts(associationElements);
List<Integer> annotateIndexes = chooseAnnotated(associationElements, annotableElements, 1, 1);
writeSampleAndAnnotated(new File(dataDir, sampleFilename), new File(dataDir, annontatedFilename), annotateIndexes, associationElements, annotableElements);
}
......@@ -116,7 +117,7 @@ public class PreprocessingRunner implements ApplicationRunner {
return associationElements;
}
private List<AssociationElement> removeFirstSmallCounts(List<AssociationElement> associationElements, int userSize, int itemSize) {
private List<AssociationElement> removeFirstSmallCounts(List<AssociationElement> associationElements) {
boolean removedUser;
boolean removedItem;
......@@ -132,7 +133,7 @@ public class PreprocessingRunner implements ApplicationRunner {
itemIdSet = associationElements.stream().map(element -> element.getItemId()).collect(Collectors.toSet());
for (Long itemId : itemIdSet) {
userCount = associationElements.stream().filter(element -> element.getItemId() == itemId).count();
if (userCount <= userSize) {
if (userCount <= annotateThreshold) {
associationElements = associationElements.stream().filter(element -> element.getItemId() != itemId).collect(Collectors.toList());
if (!removedItem) {
removedItem = true;
......@@ -149,7 +150,7 @@ public class PreprocessingRunner implements ApplicationRunner {
userIdSet = associationElements.stream().map(element -> element.getUserId()).collect(Collectors.toSet());
for (Long userId : userIdSet) {
itemCount = associationElements.stream().filter(element -> element.getUserId() == userId).count();
if (itemCount <= itemSize) {
if (itemCount <= annotateThreshold) {
associationElements = associationElements.stream().filter(element -> element.getUserId() != userId).collect(Collectors.toList());
if (!removedUser) {
removedUser = true;
......@@ -314,6 +315,11 @@ public class PreprocessingRunner implements ApplicationRunner {
annotatePercent = 1.0f;
}
if (properties.containsKey("annotateThreshold")) {
annotateThreshold = Integer.parseInt(properties.getProperty("annotateThreshold"));
} else {
annotateThreshold = 1;
}
} catch (IOException e) {
throw new PreprocessingException("Can't read parameters properties file " + dataDir + parametersFilename, e);
}
......