Commit 57a1dfbd authored by Christian Reuschling's avatar Christian Reuschling
Browse files

now with topic better topic contextualization

parent 8c9162ba
......@@ -5,7 +5,7 @@
<groupId>dfki.sds.dynaq</groupId>
<artifactId>dynaq</artifactId>
<packaging>jar</packaging>
<version>2.6-SNAPSHOT</version>
<version>2.7-SNAPSHOT</version>
<name>dynaq</name>
<url>http://dynaq.opendfki.de</url>
......@@ -22,7 +22,6 @@
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<!-- <j2seVersion>1.7</j2seVersion> -->
<maven.compiler.encoding>UTF-8</maven.compiler.encoding>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
......
......@@ -225,7 +225,7 @@ public class DynaQQuery implements Serializable
protected boolean m_bEmptyQueryMatchesAll = false;
boolean m_bTermRelevanceExplanations = false;
public boolean m_bTermRelevanceExplanations = false;
protected ColorFactory m_colorFactory = new ColorFactory();
......@@ -318,10 +318,6 @@ public class DynaQQuery implements Serializable
* @param alInitialSelectedAtts the initial selected attributes that should be searched
* @param luceneIndexSet the luceneIndexSet that should be used for index lookup
*
* @throws ParseException
* @throws IOException
* @throws DynaQException
* @throws URISyntaxException
*/
public DynaQQuery(List<IdentifiableQueryString> userSubQueryStrings, BooleanClause contextQuery, float fContextQueryWeight, List<String> alInitialSelectedAtts,
LuceneIndexSet luceneIndexSet) throws ParseException, IOException, DynaQException, URISyntaxException
......@@ -351,10 +347,6 @@ public class DynaQQuery implements Serializable
* @param alInitialSelectedAtts the initial selected attributes that should be searched
* @param luceneIndexSet the luceneIndexSet that should be used for index lookup
*
* @throws ParseException
* @throws IOException
* @throws DynaQException
* @throws URISyntaxException
*/
public DynaQQuery(List<IdentifiableQueryString> userSubQueryStrings, List<BooleanClause> contextQueries, float fContextQueryWeight, List<String> alInitialSelectedAtts,
LuceneIndexSet luceneIndexSet) throws ParseException, IOException, DynaQException, URISyntaxException
......@@ -476,10 +468,6 @@ public class DynaQQuery implements Serializable
* @param fContextQueryWeight the weight of the context query
* @param alInitialSelectedAtts the initial selected attributes that should be searched
*
* @throws ParseException
* @throws IOException
* @throws DynaQException
* @throws URISyntaxException
*/
public DynaQQuery(String strUserQueryString, BooleanClause contextQuery, float fContextQueryWeight, List<String> alInitialSelectedAtts)
throws ParseException, IOException, DynaQException, URISyntaxException
......@@ -591,9 +579,8 @@ public class DynaQQuery implements Serializable
* Performs the search, and get the result according to the query represented by this Object. This method 'eats' all the time necessary for searching.
*
* @return the result according to the query represented by this Object
* @throws Exception
*/
public DynaQResultList calculateResult() throws Exception
public DynaQResultList calculateResult()
{
m_lastCalculatedResultList = new DynaQResultList(this, m_luceneIndexSet, m_bTermRelevanceExplanations);
......@@ -608,7 +595,6 @@ public class DynaQQuery implements Serializable
* @param iTopDocs2Collect the maximum count of documents that will be collected inside the result list
*
* @return the result according to the query represented by this Object
* @throws Exception
*/
public DynaQResultList calculateResult(int iTopDocs2Collect) throws Exception
{
......
......@@ -15,7 +15,6 @@ import org.dynaq.util.lucene.basic.IndexAccessor;
import org.dynaq.util.lucene.basic.RemoteIndexSearcher;
import javax.mail.MethodNotSupportedException;
import java.io.IOException;
import java.util.*;
import java.util.logging.Logger;
......@@ -35,7 +34,7 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
*
* @author Christian Reuschling, Dipl.Ing.(BA)
*/
public class DocScoreIterator implements Iterator<DocSimilarityValue>, Iterable<DocSimilarityValue>
@SuppressWarnings("RedundantIfStatement") public class DocScoreIterator implements Iterator<DocSimilarityValue>, Iterable<DocSimilarityValue>
{
private int m_iIndex = -1;
......@@ -98,7 +97,7 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
*
* @author Christian Reuschling, Dipl.Ing.(BA)
*/
public class ScoredDynaQDocIterator implements Iterator<ScoredDynaQDocument>, Iterable<ScoredDynaQDocument>
@SuppressWarnings("RedundantIfStatement") public class ScoredDynaQDocIterator implements Iterator<ScoredDynaQDocument>, Iterable<ScoredDynaQDocument>
{
......@@ -232,7 +231,7 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
return 1.0f - (1.0f / (1.0f + fOriginLuceneScore));
}
public boolean m_bTermRelevanceExplanations;
protected DynaQQuery m_dynaQQuery;
......@@ -244,8 +243,6 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
protected TopDocs m_luceneTopDocs = null;
public boolean termRelevanceExplanations = true;
/**
......@@ -253,12 +250,11 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* recalculateFromQuery()
*
* @param dynaQQuery the query for searching
*
* @throws Exception
*/
public DynaQResultList(DynaQQuery dynaQQuery) throws Exception
{
m_luceneIndexSet = LuceneIndexSet.getServiceDefaultIndexSet();
m_bTermRelevanceExplanations = dynaQQuery.m_bTermRelevanceExplanations;
this.init(dynaQQuery);
}
......@@ -271,12 +267,11 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
*
* @param dynaQQuery the query for searching
* @param iTopDocs2Collect the maximum count of documents that will be collected inside the result list
*
* @throws Exception
*/
public DynaQResultList(DynaQQuery dynaQQuery, int iTopDocs2Collect) throws Exception
{
m_luceneIndexSet = LuceneIndexSet.getServiceDefaultIndexSet();
m_bTermRelevanceExplanations = dynaQQuery.m_bTermRelevanceExplanations;
this.setTopDocs2CollectCount(iTopDocs2Collect);
......@@ -291,12 +286,11 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
*
* @param dynaQQuery the query for searching
* @param luceneIndexSet a KafkaRCP luceneService Plugin for accessing the Lucene index
*
* @throws Exception
*/
public DynaQResultList(DynaQQuery dynaQQuery, LuceneIndexSet luceneIndexSet) throws Exception
{
m_luceneIndexSet = luceneIndexSet;
m_bTermRelevanceExplanations = dynaQQuery.m_bTermRelevanceExplanations;
this.init(dynaQQuery);
}
......@@ -310,13 +304,11 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* @param dynaQQuery the query for searching
* @param luceneIndexSet a KafkaRCP luceneService Plugin for accessing the Lucene index
* @param bTermRelevanceExplanations true: term relevance explanation collecting enabled (weaved stuff), false otherwise
*
* @throws Exception
*/
public DynaQResultList(DynaQQuery dynaQQuery, LuceneIndexSet luceneIndexSet, boolean bTermRelevanceExplanations) throws Exception
public DynaQResultList(DynaQQuery dynaQQuery, LuceneIndexSet luceneIndexSet, boolean bTermRelevanceExplanations)
{
m_luceneIndexSet = luceneIndexSet;
termRelevanceExplanations = bTermRelevanceExplanations;
m_bTermRelevanceExplanations = bTermRelevanceExplanations;
this.init(dynaQQuery);
}
......@@ -330,12 +322,11 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* @param dynaQQuery the query for searching
* @param luceneIndexSet a KafkaRCP luceneService Plugin for accessing the Lucene index
* @param iTopDocs2Collect the maximum count of documents that will be collected inside the result list
*
* @throws Exception
*/
public DynaQResultList(DynaQQuery dynaQQuery, LuceneIndexSet luceneIndexSet, int iTopDocs2Collect) throws Exception
{
m_luceneIndexSet = luceneIndexSet;
m_bTermRelevanceExplanations = dynaQQuery.m_bTermRelevanceExplanations;
this.setTopDocs2CollectCount(iTopDocs2Collect);
......@@ -352,13 +343,11 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* @param luceneIndexSet a KafkaRCP luceneService Plugin for accessing the Lucene index
* @param iTopDocs2Collect the maximum count of documents that will be collected inside the result list
* @param bTermRelevanceExplanations true: term relevance explanation collecting enabled (weaved stuff), false otherwise
*
* @throws Exception
*/
public DynaQResultList(DynaQQuery dynaQQuery, LuceneIndexSet luceneIndexSet, int iTopDocs2Collect, boolean bTermRelevanceExplanations) throws Exception
{
m_luceneIndexSet = luceneIndexSet;
termRelevanceExplanations = bTermRelevanceExplanations;
m_bTermRelevanceExplanations = bTermRelevanceExplanations;
this.setTopDocs2CollectCount(iTopDocs2Collect);
......@@ -375,7 +364,6 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* @param iDocPositionInResultList the position of the document inside the result list.
*
* @return the ScoredDynaQDocument Object out of the result list
* @throws Exception
*/
public ScoredDynaQDocument createScoredDoc(int iDocPositionInResultList) throws Exception
{
......@@ -417,9 +405,8 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* @param iDocPositionInResultList the position of the document inside the result list.
*
* @return the score Object for the result list document
* @throws IOException
*/
public DocSimilarityValue getDocScore(int iDocPositionInResultList) throws IOException
public DocSimilarityValue getDocScore(int iDocPositionInResultList)
{
DocSimilarityValue simValue = new DocSimilarityValue();
......@@ -437,7 +424,7 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
simValue.fOverallSim = DynaQResultList.normalizeLuceneScore(simValue.fOverallSim);
TermRelevancesScoreDoc termRelevancesScoreDoc = (TermRelevancesScoreDoc) this.m_luceneTopDocs.scoreDocs[iDocPositionInResultList];
TermRelevancesScoreDoc termRelevancesScoreDoc = this.m_luceneTopDocs.scoreDocs[iDocPositionInResultList];
if(m_dynaQQuery.getLuceneQuery4UserString() != null)
......@@ -519,48 +506,6 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
// /**
// * Determines for each user given query term the maximum score inside the result list. For this, the collected ExplanationValues Objects of the
// * DocScores are iterated. The method deals with the text values of the query terms, merging several field occurences.<br>
// * Remark: The method gets the DIRECT subQueries of the user given query. The returned values are the overall scores of these subQueries mapped
// to
// * all extracted term values inside these subQueries. Further, the scores are multiplicated with the coordination factor of the
// document-specific
// * explanationValues Object. This scenario is appropriate for dealing with BooleanQueries generated by a QueryParser. This might be not enough
// in
// * the case you have generated more complex queries.
// *
// * @return a mapping 'user given query term value => maximum result list score'
// *
// * @throws Exception
// */
// public HashMap<String, Float> getMaxTermScores() throws Exception
// {
// HashMap<String, Float> hsTerm2Maximum = new HashMap<String, Float>();
//
// if(m_dynaQQuery.getLuceneQuery4UserString() == null) return hsTerm2Maximum;
//
// // wir holen uns die collected scores aller dokumente
// for (DocSimilarityValue docSim : this.getResultDocScores())
// {
// // kucken jeweils in die SubQueries rein
// for (Entry<String, Float> term2Relevance : docSim.subTerms2SubScoreRelevance.entrySet())
// {
// // die Terme / der Term der SubQuery. Bei PhraseQueries sind evtl. mehrere Terme drin
// String strConcatenatedTerms = term2Relevance.getKey();
// // der Anteil dieser Terme am OverallScore - da ist der coodination factor schon mit dabei
// Float fRelevance4Term = term2Relevance.getValue();
//
// // wenn der alte Score für diesen Term kleiner war als der aktuelle subQueryScore, haben wir ein neues Maximum
// Float fOldMaxima4Term = hsTerm2Maximum.get(strConcatenatedTerms);
// if(fOldMaxima4Term == null)
// hsTerm2Maximum.put(strConcatenatedTerms, fRelevance4Term);
// else if(fOldMaxima4Term < fRelevance4Term) hsTerm2Maximum.put(strConcatenatedTerms, fRelevance4Term);
// }
// }
//
// return hsTerm2Maximum;
// }
......@@ -581,7 +526,6 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* case you don't need the content of the documents itself and their scores will be enough: This is the method of your choice.
*
* @return an Iterable over the result list document scores - the order is the result list one (sorted according the overallScore)
* @throws Exception
*/
public Iterable<DocSimilarityValue> getResultDocScores() throws Exception
{
......@@ -590,63 +534,19 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
// falls es nach der Kalkulation immer noch keine Hits gibt, geben wir einen leeren Iterator zurück
if(m_luceneTopDocs == null)
return new Iterable<DocSimilarityValue>()
{
@Override
public Iterator<DocSimilarityValue> iterator()
{
return new ArrayList<DocSimilarityValue>().iterator();
}
};
return Collections::emptyIterator;
return new DocScoreIterator();
}
// /**
// * Returns the maximum similarity value between the query context and a result document
// *
// * @return the maximum similarity value between the query context and a result document
// */
// public float getMaxContextSimilarity()
// {
// return m_fMaxContextSimilarity;
// }
// /**
// * Returns the maximum merged similarity value between the query and a result document. This is the weighted sum out of the context and the user
// * given query.
// *
// * @return the maximum merged similarity value between the query and a result document
// */
// public float getMaxMergedSimilarity()
// {
// return m_fMaxMergedSimilarity;
// }
// /**
// * Returns the maximum similarity value between the user given query and a result document
// *
// * @return the maximum similarity value between the user given query and a result document
// */
// public float getMaxUserQuerySimilaity()
// {
// return m_fMaxQuerySimilaity;
// }
/**
* Returns an Iterable over the result list. It is generally no good idea to iterate over all documents inside the result list, because the creation of a single
* Document object needs a lookup into the persistent index - so be carefull because of performance issues.
*
* @return an Iterable over the result list
* @throws Exception
*/
public Iterable<ScoredDynaQDocument> getResultDocs() throws Exception
{
......@@ -745,10 +645,8 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* Performs all stuff necessary for Object initialisation.
*
* @param dynaQQuery the query object for searching
*
* @throws Exception
*/
protected void init(DynaQQuery dynaQQuery) throws Exception
protected void init(DynaQQuery dynaQQuery)
{
m_dynaQQuery = dynaQQuery;
......@@ -786,10 +684,8 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
/**
* Recalculates the result list according to the given query. In the case something has changed inside the origin, given query (e.g. a re-weighting of the user query
* terms), this is a convenience method that can be invocated instead of creating of a new ResultList Object. The sorting criteria will be the standard dynaq sorting
*
* @throws Exception
*/
public void recalculateFromQuery() throws Exception
public void recalculateFromQuery()
{
this.recalculateFromQuery(null, false);
}
......@@ -804,10 +700,9 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
* @param strFieldName4Sorting a field name for the field that will be used for sorting. After sorting against the soecified fieldname, the standard dynaq sorting
* will be performed. This value can be null, in this case only tha DynaQ standard sorting is enabled
* @param bReverse specifies whether the sorting against the soecified fieldname will be in ascending or descending order
*
* @throws Exception
*/
public void recalculateFromQuery(String strFieldName4Sorting, boolean bReverse) throws Exception
@SuppressWarnings("CommentedOutCode")
public void recalculateFromQuery(String strFieldName4Sorting, boolean bReverse)
{
// Vorgehensweise:
......@@ -839,7 +734,7 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
// wir schalten das injizierte Flag für die Termrelevanzen scharf
TermRelevancesCollector.setTermRelevanceExplanations(query4All, termRelevanceExplanations);
TermRelevancesCollector.setTermRelevanceExplanations(query4All, m_bTermRelevanceExplanations);
......@@ -861,13 +756,13 @@ public class DynaQResultList implements Iterable<ScoredDynaQDocument>
/**
* Enables or disables the termrelevance explanations for the next recalculateFromQuery(..). Disable it in the case you doesn't need termrelevance explanations because of
* Enables or disables the termrelevance explanations for succeeding recalculateFromQuery(..) invocations. Disable it in the case you doesn't need termrelevance explanations because of
* performance issues. Currently only the DynaQ result view needs term relevance explanations. In the case you created this DynaQResultList Object with disabled
* explanations, you doesn't have to disable it again.
*/
public void setTermRelevanceExplanations(boolean bTermRelevanceExplanations)
{
termRelevanceExplanations = bTermRelevanceExplanations;
m_bTermRelevanceExplanations = bTermRelevanceExplanations;
}
......
......@@ -558,7 +558,7 @@ public class DynaQDocumentPool extends JPanel implements ActionListener, RCPPers
if (dynaQResultList.getHitCount() > 300) dynaQResultList.setTopDocs2CollectCount(dynaQResultList.getHitCount() + 1);
dynaQResultList.termRelevanceExplanations = false;
dynaQResultList.m_bTermRelevanceExplanations = false;
dynaQResultList.recalculateFromQuery();
......
......@@ -10,6 +10,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.regex.Pattern;
......@@ -74,8 +75,12 @@ class ClusterDocument implements Document
for (String strBlackListTerm : m_lBlackListTerms)
{
strText = strText.replace(strBlackListTerm.trim(), "");
strText = strText.replace(strBlackListTerm.trim().toLowerCase(), "");
// Alt: wir eliminieren aggressiv - das führt teilweise zu verhackten Clusterlabels
// strText = strText.replace(strBlackListTerm.trim(), "");
// strText = strText.replace(strBlackListTerm.trim().toLowerCase(), "");
strText = strText.replaceAll("(^|\\W+)" + Pattern.quote(strBlackListTerm.trim()) + "(\\W+|$)", " ");
strText = strText.replaceAll("(^|\\W+)" + Pattern.quote(strBlackListTerm.trim().toLowerCase()) + "(\\W+|$)", " ");
// strText = strText.replaceAll("(^|\\W+)" + "bla bla(" + "\\W+|$)", " ");
}
return strText;
......
......@@ -17,6 +17,7 @@ import org.carrot2.language.LoadedLanguages;
import org.carrot2.math.matrix.FactorizationQuality;
import org.carrot2.math.matrix.NonnegativeMatrixFactorizationEDFactory;
import org.dynaq.core.DynaQDocument;
import org.dynaq.core.DynaQQuery;
import org.dynaq.core.DynaQResultList;
import org.dynaq.documents.DynaQDocList;
import org.dynaq.documents.DynaQDocumentPool;
......@@ -52,6 +53,7 @@ import java.awt.event.ActionListener;
import java.awt.event.MouseEvent;
import java.io.File;
import java.io.IOException;
import java.rmi.server.UID;
import java.util.List;
import java.util.*;
import java.util.logging.Level;
......@@ -280,7 +282,8 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
JFrame frame = new JFrame("GraphStream");
frame.setLayout(new BorderLayout());
frame.add(clusterMapPanel, BorderLayout.CENTER);
frame.setSize(1800, 1400);
// frame.setSize(1800, 1400);
frame.pack();
frame.setVisible(true);
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
}
......@@ -293,6 +296,8 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
protected JButton m_cluster2BlackListButton = new JButton("To blacklist");
protected JButton m_clusterButton = new JButton("Cluster");
protected SpinnerModel m_clusterCountSpinnerModel = new SpinnerNumberModel(7, 2, 42, 5);
private TitledBorder m_clusterPaneBorder = BorderFactory.createTitledBorder("Document clusters (0)");
......@@ -301,16 +306,20 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
private ColorFactory m_colorFactory = new ColorFactory();
protected DynaQResultList m_currentDynaQResultList;
private JScrollPane m_docListScrollPane;
private DynaQDocList m_documentList;
protected JButton m_getAllFromResultListButton = new JButton("Cluster results");
protected int m_iCurrentlySelectedDocCount = 0;
protected int m_iMaxDocs2Load;
protected JButton m_loadBlackListButton = new JButton("Load");
protected JButton m_loadResultlistButton = new JButton("Load result list");
// der InternalFrame, in welchen die KafkaRCP das Panel eingebettet hat
protected JInternalFrame m_myFrame;
......@@ -322,13 +331,6 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
protected JCheckBox m_titlesCheckBox = new JCheckBox("Titles");
// protected Stream<ClusterDocument> dynaQDocs2clusterDocs(DynaQResultList dynaQResultList)
// {
// Stream<ScoredDynaQDocument> dynaQDocStream = StreamSupport.stream(dynaQResultList.spliterator(), false);
//
// return dynaQDocStream.map(ClusterDocument::new);
// }
public DynaQClusterMapPanel()
......@@ -353,15 +355,28 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
String strCommand = event.getActionCommand();
if(strCommand.equals("getAllFromResultListButton"))
if(strCommand.equals("loadResultlist"))
{
this.getAllFromResultList();
m_currentDynaQResultList = this.getAllFromResultList();
if(m_currentDynaQResultList != null)
clusterAndShowResultList(m_currentDynaQResultList, -1);
if(m_myFrame != null)
m_myFrame.setTitle("Clustering for current result list");
}
if(strCommand.equals("cluster2BlackListButton"))
else if(strCommand.equals("clusterButton"))
{
m_blackListTextArea.append("\n" + m_selectedClusterLabelsTextField.getText());
clusterAndShowResultList(m_currentDynaQResultList, -1);
}
if(strCommand.equals("loadBlackListButton"))
else if(strCommand.equals("cluster2BlackListButton"))
{
if(StringUtils.nullOrWhitespace(m_blackListTextArea.getText()))
m_blackListTextArea.append(m_selectedClusterLabelsTextField.getText());
else
m_blackListTextArea.append("\n" + m_selectedClusterLabelsTextField.getText());
}
else if(strCommand.equals("loadBlackListButton"))
{
FileDialog dialog = new FileDialog((Frame) null, "Load blacklist text file");
dialog.setMode(FileDialog.LOAD);
......@@ -370,7 +385,7 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
if(dialog.getFile() != null)
m_blackListTextArea.setText(FileUtilz.file2String(dialog.getDirectory() + '/' + dialog.getFile()));
}
if(strCommand.equals("saveBlackListButton"))
else if(strCommand.equals("saveBlackListButton"))
{
FileDialog dialog = new FileDialog((Frame) null, "Save blacklist text file");
dialog.setMode(FileDialog.SAVE);
......@@ -466,6 +481,7 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
@SuppressWarnings("ConstantConditions")
public int ask4RightDocCount2Cluster(int iWholeDocumentCount)
{
int iMaxDocs2Load = 500;
......@@ -545,7 +561,6 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
// https://carrot2.github.io/release/4.0.4/doc/tuning-performance/
// https://carrot2.github.io/release/4.0.4/doc/tuning-quality/
// das sind die Empfehlungen für Lingo von der carrot website
algorithm.preprocessing.wordDfThreshold
.set((int) Math.max(1, docCount2Cluster * 0.005)); // default min1 max100 default1. Ich hatte 2, das war gut für kleine Dokumentenmengen ~500docs.
......@@ -612,6 +627,40 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
public void clusterAndShowResultList(DynaQResultList dynaQResultList, int iMaxDocs2Load)
{
m_currentDynaQResultList = dynaQResultList;
m_iMaxDocs2Load = iMaxDocs2Load;
if(dynaQResultList.getHitCount() == 0)
{
JOptionPane.showMessageDialog(this, "No result documents found.");
return;
}
if(m_iMaxDocs2Load <= 0)
m_iMaxDocs2Load = ask4RightDocCount2Cluster(dynaQResultList.getHitCount() + 1);
if(m_iMaxDocs2Load < 1)
return;
if(m_iMaxDocs2Load > 300)
dynaQResultList.setTopDocs2CollectCount(m_iMaxDocs2Load);
// wichtig für unsere Performance
dynaQResultList.setTermRelevanceExplanations(false);
dynaQResultList.recalculateFromQuery();
List<DynaQDocument> lResultDocs = StreamSupport.stream(dynaQResultList.spliterator(), false).collect(Collectors.toList());
clusterAndShow(lResultDocs);
}
@NotNull
private List<Cluster<ClusterDocument>> clusterExampleDocs() throws IOException
{
......@@ -648,30 +697,13 @@ public class DynaQClusterMapPanel extends JPanel implements ActionListener, RCPP
LinkedList<ClusterNode> lClusterNodes = new LinkedList<>();