Commit 4dc624e3 authored by Kerstin Bach's avatar Kerstin Bach
Browse files

Merge branch 'deagledxd/mycbr-sdk-mycbr-3.0.1' into mycbr-3.0.1

Resolved Conflicts:
	src/test/junittest/similarity/StringFunctionTest.java
parents 87a41054 a73fbeec
......@@ -185,12 +185,16 @@ public class DateDesc extends SimpleAttDesc {
* the description for which a new function should be created
* @return the Similarity.getFunction for description desc
*/
public DateFct addDateFct(String name, boolean active) {
DateFct f = new DateFct(owner.getProject(), this, name);
public DateFct addDateFct(String name, boolean active, DateFct.DateFunctionPrecision adjust) {
DateFct f = new DateFct(owner.getProject(), this, name, adjust);
addFunction(f, active);
return f;
}
public DateFct addDateFct(String name, boolean active) {
return addDateFct(name, active, DateFct.DateFunctionPrecision.Year);
}
/*
* (non-Javadoc)
......@@ -265,7 +269,7 @@ public class DateDesc extends SimpleAttDesc {
void addDefaultFct() {
if (owner != null && owner != owner.getProject()) {
ISimFct activeSim = addDateFct(Project.DEFAULT_FCT_NAME, false);
ISimFct activeSim = addDateFct(Project.DEFAULT_FCT_NAME, false, DateFct.DateFunctionPrecision.Day);
updateAmalgamationFcts(owner, activeSim);
}
}
......
......@@ -28,12 +28,13 @@
package de.dfki.mycbr.core.similarity;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Observable;
import de.dfki.mycbr.core.Project;
import de.dfki.mycbr.core.casebase.Attribute;
import de.dfki.mycbr.core.casebase.MultipleAttribute;
import de.dfki.mycbr.core.casebase.SpecialAttribute;
import de.dfki.mycbr.core.casebase.*;
import de.dfki.mycbr.core.model.AttributeDesc;
import de.dfki.mycbr.core.model.DateDesc;
import de.dfki.mycbr.core.similarity.config.MultipleConfig;
......@@ -46,15 +47,19 @@ import de.dfki.mycbr.core.similarity.config.MultipleConfig;
*/
public class DateFct extends Observable implements ISimFct {
public enum DateFunctionPrecision { Second, Minute, Hour, Day, Month, Year }
private Project prj;
private String name;
private DateDesc desc;
protected MultipleConfig mc = MultipleConfig.DEFAULT_CONFIG;
public DateFct(Project prj,DateDesc desc, String name) {
private DateFunctionPrecision precision;
public DateFct(Project prj,DateDesc desc, String name, DateFunctionPrecision precision) {
this.prj = prj;
this.desc = desc;
this.name = name;
this.precision = precision;
}
/* (non-Javadoc)
......@@ -67,7 +72,74 @@ public class DateFct extends Observable implements ISimFct {
return prj.calculateSpecialSimilarity(value1, value2);
} else if (value1 instanceof MultipleAttribute<?> && value2 instanceof MultipleAttribute<?>) {
return prj.calculateMultipleAttributeSimilarity(this, (MultipleAttribute<?>)value1, (MultipleAttribute<?>)value2);
} else {
} else if (value1 instanceof DateAttribute && value2 instanceof DateAttribute) {
DateAttribute dateAttr1 = (DateAttribute) value1;
DateAttribute dateAttr2 = (DateAttribute) value2;
Date date1 = dateAttr1.getDate();
Date date2 = dateAttr2.getDate();
double result = 0.0;
Calendar calendar1 = GregorianCalendar.getInstance();
calendar1.setTime(date1);
Calendar calendar2 = GregorianCalendar.getInstance();
calendar2.setTime(date2);
switch (this.precision) {
case Second:
if (calendar1.get(Calendar.MINUTE) == calendar2.get(Calendar.MINUTE)
&& calendar1.get(Calendar.HOUR_OF_DAY) == calendar2.get(Calendar.HOUR_OF_DAY)
&& calendar1.get(Calendar.DAY_OF_MONTH) == calendar2.get(Calendar.DAY_OF_MONTH)
&& calendar1.get(Calendar.MONTH) == calendar2.get(Calendar.MONTH)
&& calendar1.get(Calendar.YEAR) == calendar2.get(Calendar.YEAR)) {
int secondsDate1 = calendar1.get(Calendar.SECOND);
int secondsDate2 = calendar2.get(Calendar.SECOND);
result = 1 - (Math.abs(secondsDate2 - secondsDate1) / 60.0);
}
break;
case Minute:
if (calendar1.get(Calendar.HOUR_OF_DAY) == calendar2.get(Calendar.HOUR_OF_DAY)
&& calendar1.get(Calendar.DAY_OF_MONTH) == calendar2.get(Calendar.DAY_OF_MONTH)
&& calendar1.get(Calendar.MONTH) == calendar2.get(Calendar.MONTH)
&& calendar1.get(Calendar.YEAR) == calendar2.get(Calendar.YEAR)) {
int minutesDate1 = calendar1.get(Calendar.MINUTE);
int minutesDate2 = calendar2.get(Calendar.MINUTE);
result = 1 - (Math.abs(minutesDate2 - minutesDate1) / 60.0);
}
break;
case Hour:
if (calendar1.get(Calendar.DAY_OF_MONTH) == calendar2.get(Calendar.DAY_OF_MONTH)
&& calendar1.get(Calendar.MONTH) == calendar2.get(Calendar.MONTH)
&& calendar1.get(Calendar.YEAR) == calendar2.get(Calendar.YEAR)) {
int hoursDate1 = calendar1.get(Calendar.HOUR_OF_DAY);
int hoursDate2 = calendar2.get(Calendar.HOUR_OF_DAY);
result = 1 - (Math.abs(hoursDate2 - hoursDate1) / 24.0);
}
break;
case Day:
if (calendar1.get(Calendar.MONTH) == calendar2.get(Calendar.MONTH)
&& calendar1.get(Calendar.YEAR) == calendar2.get(Calendar.YEAR)) {
int daysDate1 = calendar1.get(Calendar.DAY_OF_MONTH);
int daysDate2 = calendar2.get(Calendar.DAY_OF_MONTH);
result = 1 - (Math.abs(daysDate2 - daysDate1) / 31.0); // Muss bzgl. Schaltjahr angepasst werden
}
break;
case Month:
if (calendar1.get(Calendar.YEAR) == calendar2.get(Calendar.YEAR)) {
int monthsDate1 = calendar1.get(Calendar.MONTH);
int monthsDate2 = calendar2.get(Calendar.MONTH);
result = 1 - (Math.abs(monthsDate2 - monthsDate1) / 12.0);
}
break;
case Year:
int yearsDate1 = calendar1.get(Calendar.YEAR);
int yearsDate2 = calendar2.get(Calendar.YEAR);
result = 1 - (Math.abs(yearsDate2 - yearsDate1) / 100.0); // Konfigurierbar machen
break;
}
return Similarity.get(result);
} else {
return Similarity.INVALID_SIM;
}
......@@ -81,6 +153,12 @@ public class DateFct extends Observable implements ISimFct {
return desc;
}
/* (non-Javadoc)
*/
public DateFunctionPrecision getPrecision() {
return precision;
}
/* (non-Javadoc)
* @see de.dfki.mycbr.core.similarity.ISimFct#getMultipleConfig()
*/
......@@ -159,9 +237,12 @@ public class DateFct extends Observable implements ISimFct {
@Override
public void clone(AttributeDesc descNEW, boolean active) {
if (descNEW instanceof DateDesc && !name.equals(Project.DEFAULT_FCT_NAME)) {
DateFct f = ((DateDesc)descNEW).addDateFct(name, active);
DateFct f = ((DateDesc)descNEW).addDateFct(name, active, this.precision);
f.mc = this.mc;
}
}
public void setPrecision(DateFunctionPrecision adj) {
this.precision = adj;
}
}
......@@ -27,6 +27,7 @@
* endOfLic */
package de.dfki.mycbr.core.similarity;
import java.io.Console;
import java.util.HashSet;
import java.util.Observable;
......@@ -41,31 +42,94 @@ import de.dfki.mycbr.core.similarity.config.MultipleConfig;
import de.dfki.mycbr.core.similarity.config.StringConfig;
/**
* Calculates the similarity of two strings
* for a given description. At the moment only syntactic equality
* is supported.
* Calculates the similarity of two strings for a given description. At the
* moment only syntactic equality is supported.
*
* @author myCBR Team
*/
*/
public class StringFct extends Observable implements ISimFct {
private StringDesc desc;
private StringConfig config;
// if config == NGRAM
private int n = 3;
private int levenshteinDelCost = 1;
private int levenshteinAddCost = 1;
private int levenshteinChangeCost = 1;
/**
* @return the levenshteinDelCost
*/
public int getLevenshteinDelCost() {
return levenshteinDelCost;
}
/**
* @param levenshteinDelCost
* the levenshteinDelCost to set
*/
public void setLevenshteinDelCost(int levenshteinDelCost) {
if (levenshteinDelCost < 0) {
throw new UnsupportedOperationException ();
} else {
this.levenshteinDelCost = levenshteinDelCost;
}
}
/**
* @return the levenshteinAddCost
*/
public int getLevenshteinAddCost() {
return levenshteinAddCost;
}
/**
* @param levenshteinAddCost
* the levenshteinAddCost to set
*/
public void setLevenshteinAddCost(int levenshteinAddCost) {
if (levenshteinAddCost < 0) {
throw new UnsupportedOperationException ();
} else {
this.levenshteinAddCost = levenshteinAddCost;
}
}
/**
* @return the levenshteinChangeCost
*/
public int getLevenshteinChangeCost() {
return levenshteinChangeCost;
}
/**
* @param levenshteinChangeCost
* the levenshteinChangeCost to set
*/
public void setLevenshteinChangeCost(int levenshteinChangeCost) {
if (levenshteinChangeCost < 0) {
throw new UnsupportedOperationException ();
} else {
this.levenshteinChangeCost = levenshteinChangeCost;
}
}
private boolean caseSensitive = true;
private String name;
private Project prj;
private MultipleConfig mc = MultipleConfig.DEFAULT_CONFIG;
/**
* Initializes this with the given description.
* @param desc the description of the
*
* @param desc
* the description of the
*/
public StringFct (Project prj, StringConfig config, StringDesc desc, String name) {
public StringFct(Project prj, StringConfig config, StringDesc desc,
String name) {
this.prj = prj;
this.desc = desc;
this.config = config;
......@@ -73,88 +137,153 @@ public class StringFct extends Observable implements ISimFct {
}
/**
* Calculates the similarity of the given attributes.
* Returns null if an error occurs.
* @return 1.00 if the given attributes are equal, invalid similarity if
* an error occurs, else 0.00
* @throws Exception
* Calculates the similarity of the given attributes. Returns null if an
* error occurs.
*
* @return 1.00 if the given attributes are equal, invalid similarity if an
* error occurs, else 0.00
* @throws Exception
*/
@Override
public Similarity calculateSimilarity(Attribute value1, Attribute value2) throws Exception {
if (value1 instanceof MultipleAttribute<?> && value2 instanceof MultipleAttribute<?>) {
return prj
.calculateMultipleAttributeSimilarity(this,((MultipleAttribute<?>)value1), (MultipleAttribute<?>)value2);
} else if (!( value1 instanceof SpecialAttribute || value2 instanceof SpecialAttribute )) {
String v1 = ((StringAttribute)value1).toString();
String v2 = ((StringAttribute)value2).toString();
public Similarity calculateSimilarity(Attribute value1, Attribute value2)
throws Exception {
if (value1 instanceof MultipleAttribute<?>
&& value2 instanceof MultipleAttribute<?>) {
return prj.calculateMultipleAttributeSimilarity(this,
((MultipleAttribute<?>) value1),
(MultipleAttribute<?>) value2);
} else if (!(value1 instanceof SpecialAttribute || value2 instanceof SpecialAttribute)) {
String v1 = ((StringAttribute) value1).toString();
String v2 = ((StringAttribute) value2).toString();
if (!caseSensitive) {
v1 = v1.toLowerCase();
v2 = v2.toLowerCase();
}
switch(config) {
case EQUALITY: return v1.equals(v2) ? Similarity.get(1.00) : Similarity.get(0.00);
case NGRAM: return nGramSimilarity(v1,v2);
default: return Similarity.INVALID_SIM;
switch (config) {
case EQUALITY:
return v1.equals(v2) ? Similarity.get(1.00) : Similarity
.get(0.00);
case NGRAM:
return nGramSimilarity(v1, v2);
case LEVENSHTEIN:
return levenshteinSimilarity(v1, v2);
default:
return Similarity.INVALID_SIM;
}
} else {
return prj.calculateSpecialSimilarity(value1, value2);
}
}
/**
* Computes the NGRAM similarity of the given strings.
* The similarity results in the ration of common NGRAMs divided
* by all occurring NGRAMs. An NGRAM is a substring of length n
* of the given string.
* @param v1 the first string
* @param v2 the second string
* Computes the NGRAM similarity of the given strings. The similarity
* results in the ration of common NGRAMs divided by all occurring NGRAMs.
* An NGRAM is a substring of length n of the given string.
*
* @param v1
* the first string
* @param v2
* the second string
* @return the NGRAM similarity of the given strings
*/
@SuppressWarnings("unchecked")
private Similarity nGramSimilarity(String v1, String v2) {
HashSet<String> ngramsV1 = new HashSet<String>();
HashSet<String> ngramsV2 = new HashSet<String>();
// get all n grams of first string
for (int i = 0; i<v1.length()-n+1; i++) {
ngramsV1.add(v1.substring(i, i+n));
for (int i = 0; i < v1.length() - n + 1; i++) {
ngramsV1.add(v1.substring(i, i + n));
}
// get all n grams of second string
for (int i = 0; i<v2.length()-n+1; i++) {
ngramsV2.add(v2.substring(i, i+n));
for (int i = 0; i < v2.length() - n + 1; i++) {
ngramsV2.add(v2.substring(i, i + n));
}
// get # all occurring NGRAMS (without repetition)
HashSet<String> union = (HashSet<String>)ngramsV1.clone();
HashSet<String> union = (HashSet<String>) ngramsV1.clone();
union.addAll(ngramsV2);
float f2 = union.size();
// get # common NGRAMS (without repetition)
ngramsV2.retainAll(ngramsV1);
float f1 = ngramsV2.size();
// compute ratio
Similarity res = Similarity.get(new Double(f1/f2));
Similarity res = Similarity.get(new Double(f1 / f2));
return res;
}
private Similarity levenshteinSimilarity(String s0, String s1) {
// Source: http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Java
int len0 = s0.length() + 1;
int len1 = s1.length() + 1;
// the array of distances
int[] cost = new int[len0];
int[] newcost = new int[len0];
// initial cost of skipping prefix in String s0
for (int i = 0; i < len0; i++) cost[i] = i;
// dynamicaly computing the array of distances
// transformation cost for each letter in s1
for (int j = 1; j < len1; j++) {
// initial cost of skipping prefix in String s1
newcost[0] = j;
// transformation cost for each letter in s0
for(int i = 1; i < len0; i++) {
// matching current letters in both strings
int match = (s0.charAt(i - 1) == s1.charAt(j - 1)) ? 0 : getLevenshteinChangeCost();
// computing cost for each transformation
int cost_replace = cost[i - 1] + match;
int cost_insert = cost[i] + getLevenshteinAddCost();
int cost_delete = newcost[i - 1] + getLevenshteinDelCost();
// keep minimum cost
newcost[i] = Math.min(Math.min(cost_insert, cost_delete), cost_replace);
}
// swap cost/newcost arrays
int[] swap = cost;
cost = newcost;
newcost = swap;
}
double maxLength = s0.length() > s1.length() ? s0.length() : s1.length();
double result = 1 - (cost[len0 - 1] / maxLength);
Similarity sim = Similarity.get(result);
return sim;
}
/**
* Returns whether this function is symmetric.
* Equality and NGRAM similarity is always symmetric.
* Returns whether this function is symmetric. Equalitym, NGRAM and Levenshtein similarity
* is always symmetric.
*
* @return true
*/
public boolean isSymmetric() {
return true;
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see de.dfki.mycbr.core.similarity.ISimFct#setSymmetric(boolean)
*/
@Override
......@@ -163,10 +292,12 @@ public class StringFct extends Observable implements ISimFct {
* Not implemented because at the moment only symmetric string
* functions are supported.
*/
public void setSymmetric(boolean symmetric) {}
public void setSymmetric(boolean symmetric) {
}
/**
* Returns the configuration of this function.
*
* @return the configuration describing how to compute similarity
*/
public StringConfig getConfig() {
......@@ -174,36 +305,42 @@ public class StringFct extends Observable implements ISimFct {
}
/**
* Returns the parameter n for computing NGRAM similarity.
* N specifies the length of the substrings to be compared.
* Returns the parameter n for computing NGRAM similarity. N specifies the
* length of the substrings to be compared.
*
* @return the parameter n
*/
public int getN() {
return n;
}
/**
* Sets the parameter n for computing NGRAM similarity.
* N specifies the length of the substrings to be compared.
* @param n the new parameter n
* Sets the parameter n for computing NGRAM similarity. N specifies the
* length of the substrings to be compared.
*
* @param n
* the new parameter n
*/
public void setN(int n) {
this.n = n;
setChanged();
notifyObservers();
}
/**
* Returns whether this function is case sensitive.
*
* @return true, if function is case sensitive, false otherwise
*/
public boolean isCaseSensitive() {
return caseSensitive;
}
/**
* Specifies whether this function is case sensitive.
* @param caseSensitive true, if function is case sensitive, false otherwise
*
* @param caseSensitive
* true, if function is case sensitive, false otherwise
*/
public void setCaseSensitive(boolean caseSensitive) {
if (caseSensitive != this.caseSensitive) {
......@@ -212,8 +349,10 @@ public class StringFct extends Observable implements ISimFct {
this.caseSensitive = caseSensitive;
notifyObservers();
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see de.dfki.mycbr.core.similarity.ISimFct#getName()
*/
@Override
......@@ -225,7 +364,9 @@ public class StringFct extends Observable implements ISimFct {
return name;
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see de.dfki.mycbr.core.similarity.ISimFct#setName(java.lang.String)
*/
@Override
......@@ -242,8 +383,10 @@ public class StringFct extends Observable implements ISimFct {
notifyObservers();
}
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see de.dfki.mycbr.core.similarity.ISimFct#getDescription()
*/
@Override
......@@ -256,7 +399,9 @@ public class StringFct extends Observable implements ISimFct {
return desc;
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see de.dfki.mycbr.core.similarity.ISimFct#getProject()
*/
@Override
......@@ -264,7 +409,9 @@ public class StringFct extends Observable implements ISimFct {
return prj;
}
/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see de.dfki.mycbr.core.similarity.ISimFct#getMultipleConfig()
*/
@Override
......@@ -272,7 +419,9 @@ public class StringFct extends Observable implements ISimFct {
return mc;
}
/* (non-Javadoc)