Commit aac7175e authored by Christian Reuschling's avatar Christian Reuschling
Browse files

cache now deals with metadata | shell script sum23TestFitnessFunction

migrated to fish
parent ee5c578c
Pipeline #16695 passed with stages
in 59 seconds
......@@ -19,6 +19,9 @@ evaluateLastWinnersCount=42
showMonitorGui=true
# Whether to use an in-memory or a persistent cache. In the case of runtimePerformanceCheck=true, the caches are disabled
# Weak references for an in-memory cache. Can help with exploding memory consumption
......@@ -29,6 +32,9 @@ persistentCacheDir=
threadCount=1
# GenIe first performs a loop over all given values of all parameters, whereby the default value of the other parameters stay fixed. The default
# value of a parameter is the first one specified in the list of possible values. Here you can choose if, after this simple loop, the succeeding
# genetic run will be skipped or not
......@@ -45,6 +51,10 @@ skipGeneticRun=false
# combination.
independentParamNames=
# If true, the system will produce the same results in suceeding calls. (This is true beside uncontrollable side effects e.g. sort order of identical
# vector fitness values)
constantRandomSeed=true
......@@ -81,6 +91,10 @@ epochLength=-1
#The number of individuals that will be migrated from each island at the end of each epoch. Ignored if the island count is <=1
migrationCount=-1
# Specifies if a lower fitness value is better or vice versa
lowerScoreIsBetter=false
# The optimization process will halt after a specified number of generations passes without improvement in the population's fittest individual
......@@ -92,6 +106,9 @@ maxDurationSecondsBreak=1555200
targetFitnessBreak=1
# Each candidate vector entry gets a name and a list of possible values. These are all values GenIe will check. In the case you have number
# parameters, specify a list of discrete values from your number range
paramNames2DiscreteValSpaces=
......@@ -114,10 +131,10 @@ paramNames2DiscreteValSpaces=
}
# A fitness function can attach metadata to a candidate vector, together with it's calculated fitness. Further, the fitness function recieves a
# A fitness function can attach metadata to a candidate vector, together with it's calculated fitness. Further, the fitness function receives a
# candidate vector together with the attached metadata from its parent vectors. GenIe doesn't do anything with this metadata, it is just a possibility
# for fitness functions. The first generation of candidate vectors doesn't have any parents. Thus you can specify the parents metadata for these
# vectors manually
# to do something special for fitness functions. The first generation of candidate vectors doesn't have any parents. Thus you can specify the parents
# metadata for these vectors manually
firstGenerationParentMetadata=
{
# Double key entries are not allowed
......@@ -132,7 +149,7 @@ firstGenerationParentMetadata=
# parent population candidates for population based training. This entry can also be empty, by random
eliteMetadataCount=3
# The fitnessScore of a candidate vector can be potentially calculated by the cost function with this information: 1. candidate vector entries,
# The fitnessScore of a candidate vector can be potentially calculated by the cost function with this data: 1. candidate vector entries,
# 2. metadata parts from the parents, 3. metadata parts from the population elite, 4. metadata parts from the randomly selected elite candidate (if picked).
# For cache lookup, GenIe needs to know all relevant parts beside the vector entries, that are somehow relevant for fitnessScore calculation.
# If not specified correctly, GenIe picks formerly calculated fitnessScores by the vector entries only, which can yield to wrong values
......@@ -142,6 +159,9 @@ eliteMetadataAtts4cacheLookup=
selectedEliteMetadataAtts4cacheLookup=
# For using GenIe standalone, you can specify an exec call for the fitness function. This executable will be called for each candidate vector
# evaluation, receiving the candidate vector values as invocation arguments (just strings from the configured value range), or over stdIn if enabled.
# The forelast invocation argument will be the parents metadata (json), the last argument the metadata from the top N candidate vectors of the parent
......@@ -172,7 +192,7 @@ consideredTailLength=10000
# ...
# ]
# }
inputOverStdIn=false
inputOverStdIn=true
# The tail of the fitness function output should be logged
logExecOutputTail=true
......
......@@ -136,8 +136,8 @@ public class GenIe extends GeneticParamOptimizer
FitnessFunctionStdInInput stdInInput =
new FitnessFunctionStdInInput().setCandidateVectorParamNames(candidate.getCandidateVectorParamNames()).setCandidateVector(candidate.getCandidateVector())
.setParentsMetadata(candidate.getParentMetadata()).setEliteMetadata(candidate.getParentsGenEliteMetadata())
.setSelectedEliteMetadata(candidate.getParentsGenSelectedEliteMetadata());
.setParentsMetadata(candidate.getParentMetadata()).setEliteMetadata(candidate.getParentsGenEliteMetadata()).setSelectedEliteMetadata(
candidate.getParentsGenSelectedEliteMetadata() == null ? null : new HashMap<>(candidate.getParentsGenSelectedEliteMetadata()));
String strJson4StdInInput = JsonWriter.objectToJson(stdInInput, CollectionUtilz.createHashMap(JsonWriter.TYPE, false, JsonWriter.PRETTY_PRINT, true));
outputStreamWriter.write(strJson4StdInInput);
outputStreamWriter.close();
......
......@@ -78,7 +78,7 @@ public class CachingFitnessEvaluatorWrapper implements FitnessEvaluator<Candidat
@SuppressWarnings({"rawtypes", "unchecked"})
@SuppressWarnings({"rawtypes", "unchecked", "CommentedOutCode"})
protected TwoValuesBox<Double, HashMap<String, String>> cacheJsonRead(String strJsonFromCacheFile)
{
......@@ -124,24 +124,6 @@ public class CachingFitnessEvaluatorWrapper implements FitnessEvaluator<Candidat
// TODO:
// DONE 1. Metadaten mit in den Cache. Ein Vektor hat die ja noch nicht, bekommt sie von der FitnessFunktion, also auch vom Cache. Sprich: diese Methode setzt die Metadaten?
// Wie ist das bisher? => die Fitnessfunction setzt fitness + metadaten. Das muß hier auch der Fall sein => Metadaten mit cachen
// DONE 2. FileNames können je nach VektorWerten zu komplex werden. Sprechend FileNames sind aber auch schick. Konfigurieren? Grep geht aber auch. Prüfsumme? Prüfsumme + die
// ersten NChars vom Vektor? Prüfsumme: darin alle relevanten MetadatenAttVals + Vektor verwursten (parentMetadataAtts4cacheLookup, eliteMetadataAtts4cacheLookup,
// selectedEliteEntryMetadataAtts4cacheLookup => noch testen
// Beachte:
// * im StringKeyCache stehen die FileNames als keys (so wird der initialisiert) => passt
// * der cache kann nicht disabled werden ? Ist egal, wenn der funzt ist der immer gut? Möglichkeit zum disablen aber immer gut => wenn empty dir => disabled
// DONE 3. Soo, mit Christoph besprochen:
// * Auswahl des topN-Metadatums erfolgt in GenIe, damit wird fitnessfunktionInput=>fitnessValue deterministisch und somit cachebar
// => das selektierte Metadatum wird der fitness function übergeben, oder keins!
// * es wird konfiguriert, welches metadatumAtt cacherelevant ist
// * CacheKey immer Vektor+cacherelevantes MetadatumAtt aus dem selektierten TopNMetadatum
// hierwarich. Alles implemntiert? Doku muß noch min ins Wiki
try
{
Double dFitness = null;
......@@ -162,8 +144,7 @@ public class CachingFitnessEvaluatorWrapper implements FitnessEvaluator<Candidat
dFitness = fitness2Metadata.getFirst();
candidate.setMetadata(fitness2Metadata.getSecond());
}
else
dFitness = null;
}
else
{
......@@ -242,7 +223,7 @@ public class CachingFitnessEvaluatorWrapper implements FitnessEvaluator<Candidat
for (Object strVectorEntry : (List<Object>) candidate)
strbCsvLine.append(strVectorEntry.toString()).append(",");
else
strbCsvLine.append(candidate).append(","); // TODO
strbCsvLine.append(candidate.toString().replace('\n', ' ').replace(',', ';')).append(",");
strbCsvLine.append(m_dateFormat.format(new Timestamp(System.currentTimeMillis()))).append(",");
strbCsvLine.append(lCalcTime).append(",").append(dFitness).append(",");
......@@ -430,8 +411,6 @@ public class CachingFitnessEvaluatorWrapper implements FitnessEvaluator<Candidat
try
{
String strKey = pCacheFile.getFileName().toString();
// Double dValue = Double.valueOf(new String(Files.readAllBytes(pCacheFile)));
// m_hsStringKeyCache.put(strKey, dValue);
TwoValuesBox<Double, HashMap<String, String>> fitness2Metadata = cacheJsonRead(new String(Files.readAllBytes(pCacheFile)));
m_hsStringKeyCache.put(strKey, new CacheEntry().setFitness(fitness2Metadata.getFirst()).setMetadata(fitness2Metadata.getSecond()));
......
......@@ -50,7 +50,7 @@ public class CandidateVectorWithMetadata
* cost function for information. Note that the cache lookup keys will be generated according the candidates vector entries plus selected metadata attribute entries out of
* this member variable
*/
HashMap<String, String> m_hsParentsGenSelectedEliteMetadata = new HashMap<>();
HashMap<String, String> m_hsParentsGenSelectedEliteMetadata;
/**
* Will be set by the system (by crossover.mate(..)). The scores of the parents in the same order as the parents metadata. Will be given to the cost function for information
*/
......
......@@ -18,16 +18,16 @@ public class Sum23Test
public static void main(String[] args)
{
CandidateVectorWithMetadata bestVector = new GenIe().setFitnessCalculator((candidate, population) -> {
int iSum = candidate.getCandidateVector().stream().mapToInt(Integer::valueOf).sum();
if (iSum == 23)
return 0.9d;
return 0d;
}).optimizeParams("src/test/java/de/dfki/sds/genie/geneticOptimization.conf");
// CandidateVectorWithMetadata bestVector = new GenIe().optimizeParams("src/test/java/de/dfki/sds/genie/geneticOptimization.conf");
// CandidateVectorWithMetadata bestVector = new GenIe().setFitnessCalculator((candidate, population) -> {
// int iSum = candidate.getCandidateVector().stream().mapToInt(Integer::valueOf).sum();
//
// if (iSum == 23)
// return 1.0d;
//
// return 0d;
// }).optimizeParams("src/test/java/de/dfki/sds/genie/geneticOptimization.conf");
CandidateVectorWithMetadata bestVector = new GenIe().optimizeParams("src/test/java/de/dfki/sds/genie/geneticOptimization.conf");
}
}
......@@ -7,7 +7,7 @@
runtimePerformanceCheck=false
# Show more log messages
verbose=false
verbose=true
# If an exception was thrown during fitness calculation, the optimization process won't be interrupted, just a log message will be shown
interruptIfException=true
......@@ -19,6 +19,9 @@ evaluateLastWinnersCount=42
showMonitorGui=true
# Whether to use an in-memory or a persistent cache. In the case of runtimePerformanceCheck=true, the caches are disabled
# Weak references for an in-memory cache. Can help with exploding memory consumption
......@@ -29,6 +32,9 @@ persistentCacheDir=
threadCount=1
# GenIe first performs a loop over all given values of all parameters, whereby the default value of the other parameters stay fixed. The default
# value of a parameter is the first one specified in the list of possible values. Here you can choose if, after this simple loop, the succeeding
# genetic run will be skipped or not
......@@ -45,6 +51,10 @@ skipGeneticRun=false
# combination.
independentParamNames=
# If true, the system will produce the same results in suceeding calls. (This is true beside uncontrollable side effects e.g. sort order of identical
# vector fitness values)
constantRandomSeed=true
......@@ -81,6 +91,10 @@ epochLength=-1
#The number of individuals that will be migrated from each island at the end of each epoch. Ignored if the island count is <=1
migrationCount=-1
# Specifies if a lower fitness value is better or vice versa
lowerScoreIsBetter=false
# The optimization process will halt after a specified number of generations passes without improvement in the population's fittest individual
......@@ -92,6 +106,9 @@ maxDurationSecondsBreak=1555200
targetFitnessBreak=1.1
# Each candidate vector entry gets a name and a list of possible values. These are all values GenIe will check. In the case you have number
# parameters, specify a list of discrete values from your number range
paramNames2DiscreteValSpaces=
......@@ -114,10 +131,10 @@ paramNames2DiscreteValSpaces=
}
# A fitness function can attach metadata to a candidate vector, together with it's calculated fitness. Further, the fitness function recieves a
# A fitness function can attach metadata to a candidate vector, together with it's calculated fitness. Further, the fitness function receives a
# candidate vector together with the attached metadata from its parent vectors. GenIe doesn't do anything with this metadata, it is just a possibility
# for fitness functions. The first generation of candidate vectors doesn't have any parents. Thus you can specify the parents metadata for these
# vectors manually
# to do something special for fitness functions. The first generation of candidate vectors doesn't have any parents. Thus you can specify the parents
# metadata for these vectors manually
firstGenerationParentMetadata=
{
# Double key entries are not allowed
......@@ -132,7 +149,7 @@ firstGenerationParentMetadata=
# parent population candidates for population based training. This entry can also be empty, by random
eliteMetadataCount=3
# The fitnessScore of a candidate vector can be potentially calculated by the cost function with this information: 1. candidate vector entries,
# The fitnessScore of a candidate vector can be potentially calculated by the cost function with this data: 1. candidate vector entries,
# 2. metadata parts from the parents, 3. metadata parts from the population elite, 4. metadata parts from the randomly selected elite candidate (if picked).
# For cache lookup, GenIe needs to know all relevant parts beside the vector entries, that are somehow relevant for fitnessScore calculation.
# If not specified correctly, GenIe picks formerly calculated fitnessScores by the vector entries only, which can yield to wrong values
......@@ -142,6 +159,9 @@ eliteMetadataAtts4cacheLookup=
selectedEliteMetadataAtts4cacheLookup=
# For using GenIe standalone, you can specify an exec call for the fitness function. This executable will be called for each candidate vector
# evaluation, receiving the candidate vector values as invocation arguments (just strings from the configured value range), or over stdIn if enabled.
# The forelast invocation argument will be the parents metadata (json), the last argument the metadata from the top N candidate vectors of the parent
......@@ -172,7 +192,7 @@ consideredTailLength=10000
# ...
# ]
# }
inputOverStdIn=false
inputOverStdIn=true
# The tail of the fitness function output should be logged
logExecOutputTail=true
......
#!/usr/bin/tcsh
#!/usr/bin/fish
if ( "$1" != "" ) then
if set -q argv[1]
# argument defined - we assume command parameter input
echo Candidate vector to evaluate: $1,$2,$3,$4
echo ParentsMetadata: $5
echo TopNParentsGenerationMetadata: $6
set summandA = $1
set summandB = $2
set summandC = $3
set summandD = $4
else
echo Candidate vector to evaluate: $argv[1],$argv[2],$argv[3],$argv[4]
echo ParentsMetadata: $argv[5]
echo TopNParentsGenerationMetadata: $argv[6]
set summandA $argv[1]
set summandB $argv[2]
set summandC $argv[3]
set summandD $argv[4]
else
# no argument defined - we assume json input from stdIn
# weird that we loose newlines here due to tcsh backticks
set stdInput = `cat`
#echo $stdInput
set summandA = `echo $stdInput | jq -r '.candidateVector[0]'`
set summandB = `echo $stdInput | jq -r '.candidateVector[1]'`
set summandC = `echo $stdInput | jq -r '.candidateVector[2]'`
set summandD = `echo $stdInput | jq -r '.candidateVector[3]'`
set stdInput (cat)
set summandA (echo $stdInput | jq -r '.candidateVector[0]')
set summandB (echo $stdInput | jq -r '.candidateVector[1]')
set summandC (echo $stdInput | jq -r '.candidateVector[2]')
set summandD (echo $stdInput | jq -r '.candidateVector[3]')
echo VectorParamNames: `echo $stdInput | jq '.candidateVectorParamNames'`
echo ParentsMetadata: `echo $stdInput | jq '.parentsMetadata'`
echo TopNParentsGenerationMetadata: `echo $stdInput | jq '.eliteMetadata'`
endif
echo Candidate vector to evaluate: $summandA,$summandB,$summandC,$summandD
echo VectorParamNames: (echo $stdInput | jq '.candidateVectorParamNames')
echo ParentsMetadata: (echo $stdInput | jq '.parentsMetadata')
echo TopNParentsGenerationMetadata: (echo $stdInput | jq '.eliteMetadata')
end
# some random metadata
set metadata = '{"shellVarNameExample":"shellVarValueExample","givenVector":"'$summandA','$summandB','$summandC','$summandD'"}'
# some arbitrary metadata
set metadata '{"shellVarNameExample":"shellVarValueExample","givenVector":"'$summandA','$summandB','$summandC','$summandD'"}'
# we check if the sum is 23
@ sum = $summandA + $summandB + $summandC + $summandD
set sum (math $summandA + $summandB + $summandC + $summandD)
if ( $sum == 23) then
if test $sum -eq 23
# the sum is 23 - we have a match
echo '{"fitness":0.9,"metadata":'"$metadata"'}'
echo '{"fitness":1.0,"metadata":'"$metadata"'}'
else
# the sum is not 23 - we give zero points
echo '{"fitness":0,"metadata":'"$metadata"'}'
endif
end
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment