Commit f1d5f488 authored by reuschling's avatar reuschling
Browse files

bugfix: weaved, internal datastructures was not cleaned correctly. This

leaded to outOfMemory Exceptions when exiting DynaQ with an open result
list with millions of result documents


git-svn-id: https://dynaq.opendfki.de/repos/application/trunk@1050 8b50620d-ddf8-0310-9f5d-8b2f31fd9ac4
parent 5b838a12
......@@ -36,8 +36,6 @@ import org.dynaq.index.LuceneIndexSet;
import org.dynaq.util.lucene.BooleanQueryWithMetaData;
import org.dynaq.util.lucene.BoostedQueriesExtractor;
import org.dynaq.util.swing.ColorFactory;
import org.dynaq.webservice.DynaQServer;
import org.kafkaRCP.ui.KafkaRCP;
import de.dfki.inquisition.exceptions.ExceptionUtils;
import de.dfki.inquisition.lucene.LuceneUtilz;
......@@ -628,6 +626,14 @@ public class DynaQQuery implements Serializable
/**
* Sets the internal reference to the result list to null
*/
public void clearResult()
{
m_lastCalculatedResultList = null;
}
......
......@@ -36,6 +36,10 @@ public class DynaQClassWeaver
// cp /home/reuschling/.m2/repository/org/apache/lucene/lucene-core/4.9.0/*.jar .;find -type f -name '*.class' -print0 | xargs -0 zip -f lucene-core-4.9.0.jar;mvn
// install:install-file -Dfile=lucene-core-4.9.0.jar -Dsources=lucene-core-4.9.0-sources.jar -DgroupId=org.apache.lucene -DartifactId=lucene-core
// -Dversion=4.9.0-dynaq-patched -Dpackaging=jar -DgeneratePom=true
//
// mvn org.apache.maven.plugins:maven-deploy-plugin:2.7:deploy-file -DrepositoryId=artifactory2-libs-releases-local
// -Durl=http://www.dfki.uni-kl.de/artifactory/libs-releases-local -Dfile=lucene-core-4.9.0.jar -Dsources=lucene-core-4.9.0-sources.jar
// -DgroupId=org.apache.lucene -DartifactId=lucene-core -Dversion=4.9.0-dynaq-patched -Dpackaging=jar -DgeneratePom=true
}
......
......@@ -194,7 +194,7 @@ public class TermRelevancesCollector
}
// wir geben den nunmehr unnötigen Speicher wieder frei
enableTermRelevanceExplanations(query4Weight);
clearCollectedData(query4Weight);
}
......@@ -233,6 +233,41 @@ public class TermRelevancesCollector
/**
* Clears the collected data from the query Object. In the case it is a BooleanQuery or a FilteredQuery, all underlying queries will be cleared recursively
*
* @param query the query
*/
static public void clearCollectedData(Query query)
{
if(query == null) return;
// eigentlich müssten das da sein, da wir die Oberklasse (Query) modifiziert haben
if(!(query instanceof TermRelevancesQuery))
throw new IllegalStateException("Query don't implements the TermRelevancesQuery interface, which is necessary for term relevance explanation:\n" + query
+ "\nPropably the Lucene classes are not weaved. Invoke 'DynaQ.weaveClasses();' before the Lucene classes are first used\n"
+ "(i.e. loaded by the class loader). For details, look at org.dynaq.core.weaving.TermRelevancesCollector\n" + query);
if(query instanceof BooleanQuery)
{
for (BooleanClause clause : ((BooleanQuery) query).getClauses())
clearCollectedData(clause.getQuery());
}
if(query instanceof FilteredQuery)
{
clearCollectedData(((FilteredQuery) query).getQuery());
}
((TermRelevancesQuery) query).clearCollectedData();
}
static CtConstructor getDeclaredConstructor(CtClass constructorClass, String strConstructorSignature)
{
......@@ -567,6 +602,9 @@ public class TermRelevancesCollector
queryClass.addMethod(CtNewMethod.make("public void enableTermRelevances(boolean bEnable){m_bEnableTermRelevances = bEnable;}", queryClass));
queryClass.addMethod(CtNewMethod.make("public boolean isTermRelevancesEnabled(){return m_bEnableTermRelevances;}", queryClass));
queryClass.addMethod(CtNewMethod.make("public void clearCollectedData(){m_hsDocNo2Score.clear();}", queryClass));
queryClass.addField(new CtField(classPool.get("java.lang.Float"), "m_fQueryNorm", queryClass), "null");
queryClass.addMethod(CtNewMethod.make("public void setNormalizationValue(java.lang.Float fQueryNorm){m_fQueryNorm = fQueryNorm;}", queryClass));
queryClass.addMethod(CtNewMethod.make("public java.lang.Float getNormalizationValue(){return m_fQueryNorm;}", queryClass));
......
......@@ -9,6 +9,10 @@ package org.dynaq.core.weaving;
public interface TermRelevancesQuery
{
public void clearCollectedData();
public void enableTermRelevances(boolean bEnable);
......
......@@ -57,6 +57,7 @@ import de.dfki.km.leech.metadata.LeechMetadata;
/**
* TODO: ist zwar hübsch, einen DynaQ-standalone-Server zu haben, doch ein deployen als war-file sollte auch möglich sein. mal abchecken, wie das so gehen würde
* Aaalso: der default-Konstruktor startet den Server nicht, also können wir diese Klasse als HandlerKlasse angeben (im delight Servlet)
*
*
* DynaQService implementation that acts as a standalone server. Because of the need of dynamically creating handlers for new indices and enabling them to be visible
......@@ -141,7 +142,7 @@ public class DynaQServer implements DynaQService, Runnable
*/
public DynaQServer() throws Exception
{
this(-1);
this(false);
}
......@@ -211,7 +212,8 @@ public class DynaQServer implements DynaQService, Runnable
/**
* Creates a new DynaQServer Object, starting the service
*
* @param iServicePort the port under which the service should be started. If -1 the configured port number will be used. Smaller than -1 means that the service will NOT start. Use this to easily read out the server related config files.
* @param iServicePort the port under which the service should be started. If -1 the configured port number will be used. Smaller than -1 means that the service will
* NOT start. Use this to easily read out the server related config files.
* @param bOnlyLocalhost true if this interface should only be accessible from localhost
*
* @throws Exception
......@@ -236,7 +238,6 @@ public class DynaQServer implements DynaQService, Runnable
Boolean bOnlyLocalhost = mvc.getFirstAsBoolean(AttributeConfig.ConfigAttributes.DYNAQSERVICE.SERVICE_ONLY_LOCALHOST_VISIBLE);
if(bOnlyLocalhost == null) bOnlyLocalhost = false;
// bei -1 wird irgendein freier port ermittelt. Dann gibts die chance, den mit zeroConf wieder zu finden
if(m_bStartService)
{
m_serverUri = DynaQServiceUtils.startDynaQServiceServer(m_servicePort, this, bOnlyLocalhost);
......@@ -664,7 +665,7 @@ public class DynaQServer implements DynaQService, Runnable
List<String> lUids = this.getDocumentAttributeValues(docIdAttName, docId, LeechMetadata.id, indexSetId);
if(lUids.isEmpty()) throw new IllegalStateException("document has no leech UID");
String docUId = lUids.iterator().next();
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
......@@ -683,11 +684,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf4IndexCorpus(String text, int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId)
throws Exception
......@@ -734,8 +730,8 @@ public class DynaQServer implements DynaQService, Runnable
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
return buzzwordsWithTfIdf;
}
......@@ -751,12 +747,12 @@ public class DynaQServer implements DynaQService, Runnable
List<String> lUids = this.getDocumentAttributeValues(docIdAttName, docId, LeechMetadata.id, indexSetId);
if(lUids.isEmpty()) throw new IllegalStateException("document has no leech UID");
String docUId = lUids.iterator().next();
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
return buzzwordsWithTfIdf;
}
......@@ -872,7 +868,7 @@ public class DynaQServer implements DynaQService, Runnable
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
SimpleIndexInterface indexInterface = new SimpleIndexInterface(luceneIndexSet);
return indexInterface.getIndexingQueueSize();
}
......@@ -1410,8 +1406,8 @@ public class DynaQServer implements DynaQService, Runnable
RemoteIndexReader readerHandler = IndexAccessor.getMultiIndexReader(luceneIndexSet.getIndexPaths());
// das sollte eigentlich nicht der Fall sein, allerdings ist es geschickt beim developen, wenn der Server hier gestartet wird
//XXX hier hat er keinen Pfad, da das default-Servlet keine entsprechende Annotation hat. Muß das sein? Ich kommentiere es mal aus
// if(!DelightServerManager.isServerUpAndRunning(m_servicePort)) DelightServerManager.startServer( m_servicePort);
// XXX hier hat er keinen Pfad, da das default-Servlet keine entsprechende Annotation hat. Muß das sein? Ich kommentiere es mal aus
// if(!DelightServerManager.isServerUpAndRunning(m_servicePort)) DelightServerManager.startServer( m_servicePort);
URI readerUri = DelightServerManager.addHandler(m_servicePort, readerHandler, indexSetId + "_reader", false);
m_hsIndexSetID2IndexReaderHandler.put(indexSetId, readerHandler);
......
......@@ -130,145 +130,6 @@ public class DynaQServiceUtils
// /**
// * Returns an IndexReader client Object for a remote index set. This is for low-level Lucene programming.<br>
// * CAUTION: This convinience method only works with DynaQService Objects created with this interface, because the class remembers the ip address of each Object at
// * creation time. In the case this don't fits your needs, use the according method by specifying the indexSetReaders host URI.
// *
// * @param dynaQService the (remote) DynaQ Service which gives access to this indexSet
// * @param strIndexSetID an ID to identify the index set
// *
// * @return an IndexReader client Object for the given IndexSetID
// */
// static public IndexReader getIndexSetReader(DynaQService dynaQService, String strIndexSetID)
// {
//
// try
// {
//
// String strReaderUri = dynaQService.getIndexSetReaderURI(strIndexSetID);
//
// return getIndexSetReader(new URI(strReaderUri));
//
//
// }
// catch (URISyntaxException e)
// {
// Logger.getLogger(DynaQServiceUtils.class.getName()).log(Level.SEVERE, "", e);
//
// return null;
// }
// }
//
//
//
//
//
//
// /**
// * Returns an IndexReader client Object for a remote index set. This is for low-level Lucene programming.<br>
// *
// * @param indexSetReaderHostUri the URI this service can be accessed
// *
// * @return an IndexReader client Object for the given IndexSetID
// */
// static public IndexReader getIndexSetReader(URI indexSetReaderHostUri)
// {
// // try
// // {
//
// String strHostUri = indexSetReaderHostUri.toString();
// String strHandlerName = strHostUri.substring(strHostUri.lastIndexOf('/') + 1);
//
//
// // todo *******************************************************************
// Delight delight = new Delight(DelightConfigFinder.getDefaultConfig());
// DynaQService client = delight.connectingTo(strHostUri).usingApi(dynaQServiceName, DynaQService.class);
// //
// // IndexReader remoteIndexReader =
// // XmlRpc.createClientForClass(IndexReader.class, strHandlerName, XmlRpcConnection.create(indexSetReaderHostUri.toURL()));
// //
// //
// // return remoteIndexReader;
//
// return null;
//
// // }
// // catch (MalformedURLException e)
// // {
// // Logger.getLogger(DynaQServiceUtils.class.getName()).log(Level.SEVERE, "Malformed URL", e);
// //
// // return null;
// // }
// }
//
//
//
//
// /**
// * Returns an IndexSearcher client Object for a remote index set. This is for low-level Lucene programming.<br>
// * CAUTION: This convinience method only works with DynaQService Objects created with this interface, because the class remembers the ip address of each Object at
// * creation time. In the case this don't fits your needs, use the according method by specifying the indexSetSearcher host URI.
// *
// * @param dynaQService the (remote) DynaQ Service which gives access to this indexSet
// * @param strIndexSetID an ID to identify the index set
// *
// * @return an IndexSearcher client Object for the given IndexSetID
// */
// static public Searcher getIndexSetSearcher(DynaQService dynaQService, String strIndexSetID)
// {
//
// try
// {
//
// String strSearcherUri = dynaQService.getIndexSetSearcherURI(strIndexSetID);
//
// return getIndexSetSearcher(new URI(strSearcherUri));
//
//
// }
// catch (URISyntaxException e)
// {
// Logger.getLogger(DynaQServiceUtils.class.getName()).log(Level.SEVERE, "", e);
//
// return null;
// }
// }
//
//
//
// /**
// * Returns an IndexSearcher client Object for a remote index set. This is for low-level Lucene programming.<br>
// *
// * @param indexSetSearcherHostUri the URI this service can be accessed
// *
// * @return an IndexSearcher client Object for the given IndexSetID
// */
// static public Searcher getIndexSetSearcher(URI indexSetSearcherHostUri)
// {
// // try
// // {
// String strHostUri = indexSetSearcherHostUri.toString();
// String strHandlerName = strHostUri.substring(strHostUri.lastIndexOf('/') + 1);
//
// // Searcher remoteIndexSearcher =
// // XmlRpc.createClientForClass(Searcher.class, strHandlerName, XmlRpcConnection.create(indexSetSearcherHostUri.toURL()));
//
// // todo *******************************************************************
// Searcher remoteIndexSearcher = null;
//
//
// return remoteIndexSearcher;
//
// //
// // }
// // catch (MalformedURLException e)
// // {
// // Logger.getLogger(DynaQServiceUtils.class.getName()).log(Level.SEVERE, "Malformed URL", e);
// //
// // return null;
// // }
// }
......@@ -326,6 +187,7 @@ public class DynaQServiceUtils
m_iServerPort = DelightServerManager.startServer(iServerPort, bOnlyLocalhost, DynaQDispatcherServlet.class.getName());
//XXX hier können wir noch zusätzliche Handler hinzufügen, z.B. eine Trend-Schnittstelle...
return DelightServerManager.addHandler(m_iServerPort, serviceHandler, dynaQServiceHandlerName, false);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment