Toggle navigation
Toggle navigation
This project
Loading...
Sign in
grogv3
/
grog-cubi
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Jean-Francois Leveque
2017-05-19 14:09:45 +0200
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
5831176936a95ae09752a2abe9519e56b3aea3e3
58311769
1 parent
fccb5869
Nettoyage des solitaires et non-sélection des solitaires pour les annotations.
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
84 additions
and
8 deletions
grog-recommendation/grog-recommendation-preprocess/src/main/java/org/legrog/recommendation/preprocess/PreprocessingRunner.java
grog-recommendation/grog-recommendation-preprocess/src/main/resources/logback.xml
grog-recommendation/grog-recommendation-preprocess/src/main/java/org/legrog/recommendation/preprocess/PreprocessingRunner.java
View file @
5831176
...
...
@@ -11,10 +11,7 @@ import org.springframework.boot.ApplicationRunner;
import
org.springframework.stereotype.Component
;
import
java.io.*
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Properties
;
import
java.util.Random
;
import
java.util.*
;
import
java.util.stream.Collectors
;
import
java.util.stream.StreamSupport
;
...
...
@@ -60,20 +57,98 @@ public class PreprocessingRunner implements ApplicationRunner {
loadParameters
();
setFilenames
();
List
<
AssociationElement
>
associationElements
=
loadAssociationElements
(
new
File
(
dataDir
,
completeFilename
));
List
<
Integer
>
annotateIndexes
=
chooseAnnotated
(
associationElements
.
size
());
associationElements
=
cleanupSmallCounts
(
associationElements
,
1
,
1
);
List
<
Integer
>
annotateIndexes
=
chooseAnnotated
(
associationElements
,
1
,
1
);
writeSampleAndAnnotated
(
new
File
(
dataDir
,
sampleFilename
),
new
File
(
dataDir
,
annontatedFilename
),
annotateIndexes
,
associationElements
);
}
private
List
<
Integer
>
chooseAnnotated
(
int
size
)
{
private
List
<
AssociationElement
>
cleanupSmallCounts
(
List
<
AssociationElement
>
associationElements
,
int
userSize
,
int
itemSize
)
{
boolean
removedUser
;
boolean
removedItem
;
long
userCount
;
long
itemCount
;
Set
<
Long
>
itemIdSet
;
Set
<
Long
>
userIdSet
;
do
{
removedUser
=
false
;
removedItem
=
false
;
// Books or ratings are more alone than users, so we start with them
itemIdSet
=
associationElements
.
stream
().
map
(
element
->
element
.
getItemId
()).
collect
(
Collectors
.
toSet
());
for
(
Long
itemId
:
itemIdSet
)
{
userCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getItemId
()
==
itemId
).
count
();
if
(
userCount
<=
userSize
)
{
associationElements
=
associationElements
.
stream
().
filter
(
element
->
element
.
getItemId
()
!=
itemId
).
collect
(
Collectors
.
toList
());
if
(!
removedItem
)
{
removedItem
=
true
;
logger
.
debug
(
"Removed first item"
);
}
logger
.
trace
(
"Removed item {}"
,
itemId
);
}
}
logger
.
debug
(
"Remaining AssociationElement count {}"
,
associationElements
.
size
());
// Then we remove users
userIdSet
=
associationElements
.
stream
().
map
(
element
->
element
.
getUserId
()).
collect
(
Collectors
.
toSet
());
for
(
Long
userId
:
userIdSet
)
{
itemCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getUserId
()
==
userId
).
count
();
if
(
itemCount
<=
itemSize
)
{
associationElements
=
associationElements
.
stream
().
filter
(
element
->
element
.
getUserId
()
!=
userId
).
collect
(
Collectors
.
toList
());
if
(!
removedUser
)
{
removedUser
=
true
;
logger
.
debug
(
"Removed first user"
);
}
logger
.
trace
(
"Removed user {}"
,
userId
);
}
}
logger
.
debug
(
"Remaining AssociationElement count {}"
,
associationElements
.
size
());
logger
.
debug
(
"Remover item or user {}"
,
removedUser
||
removedItem
);
}
while
(
removedUser
||
removedItem
);
return
associationElements
;
}
private
List
<
Integer
>
chooseAnnotated
(
List
<
AssociationElement
>
associationElements
,
int
userSize
,
int
itemSize
)
{
List
<
Integer
>
annotatedChosen
=
new
ArrayList
<>();
int
size
=
associationElements
.
size
();
long
userCount
=
0
;
long
itemCount
=
0
;
AssociationElement
randomAssociationElement
;
AssociationElement
checkingAssociationElement
;
Random
random
=
new
Random
();
Integer
randomInteger
;
while
(
annotatedChosen
.
size
()
<
size
*
annotatePercent
/
100.0
)
{
randomInteger
=
new
Integer
(
random
.
nextInt
(
size
));
if
(!
annotatedChosen
.
contains
(
randomInteger
))
{
annotatedChosen
.
add
(
randomInteger
);
randomAssociationElement
=
associationElements
.
get
(
randomInteger
);
final
Long
itemId
=
randomAssociationElement
.
getItemId
();
final
Long
userId
=
randomAssociationElement
.
getUserId
();
userCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getItemId
()
==
itemId
).
count
();
itemCount
=
associationElements
.
stream
().
filter
(
element
->
element
.
getUserId
()
==
userId
).
count
();
// Decreasing values based on planned suppressions
// TODO Refactor writeSampleAndAnnotated and chooseAnnotated to avoid this
for
(
Integer
annotatedIndex
:
annotatedChosen
)
{
checkingAssociationElement
=
associationElements
.
get
(
annotatedIndex
);
if
(
checkingAssociationElement
.
getUserId
()
==
userId
)
{
userCount
--;
}
if
(
checkingAssociationElement
.
getItemId
()
==
itemId
)
{
itemCount
--;
}
}
if
(
userCount
>
userSize
&&
itemCount
>
itemSize
)
{
annotatedChosen
.
add
(
randomInteger
);
}
}
}
...
...
grog-recommendation/grog-recommendation-preprocess/src/main/resources/logback.xml
View file @
5831176
...
...
@@ -11,6 +11,7 @@
<logger
name=
"org.legrog"
level=
"DEBUG"
/>
<logger
name=
"org.legrog.recommendation.preprocess"
level=
"TRACE"
/>
<logger
name=
"org.legrog.recommendation.preprocess.PreprocessingRunner"
level=
"DEBUG"
/>
<root
level=
"warn"
>
<appender-ref
ref=
"STDOUT"
/>
...
...
Please
register
or
login
to post a comment