Commit 5b838a12 authored by reuschling's avatar reuschling
Browse files

bugfix: added additional null check necessary for new JSON parser in the

aloe service interface | added service methods to get buzzwords directly
from a document in the index


git-svn-id: https://dynaq.opendfki.de/repos/application/trunk@1049 8b50620d-ddf8-0310-9f5d-8b2f31fd9ac4
parent f11e16a1
......@@ -76,7 +76,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
for (int i = 0; i < arrayValues.size(); i++)
{
String strValue = ValueUtilz.asString(arrayValues.get(i).getAsString());
String strValue = ValueUtilz.asString(arrayValues.get(i).isJsonNull() ? null : arrayValues.get(i).getAsString());
llAttNames.add(key2value.getKey());
llAttValues.add(strValue);
......@@ -84,7 +84,8 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
}
else
{
String strValue = ValueUtilz.asString(key2value.getValue().getAsString());
String strValue = ValueUtilz.asString(key2value.getValue().isJsonNull() ? null : key2value.getValue().getAsString());
llAttNames.add(key2value.getKey());
llAttValues.add(strValue);
......@@ -140,7 +141,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
flattenToAttValuePairs(jsData, llAttNames, llAttValues);
String strID = ValueUtilz.asString(jsData.get(strIDAttName).getAsString());
String strID = ValueUtilz.asString(jsData.get(strIDAttName).isJsonNull() ? null : jsData.get(strIDAttName).getAsString());
if(strID == null) throw new IllegalStateException("there is no resourceId");
Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Will add metadata to document " + strID);
......@@ -159,7 +160,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
flattenToAttValuePairs(jsData, llAttNames, llAttValues);
// wir holen uns das doc um zu kucken, was für Atts da drin stecken
String strID = ValueUtilz.asString(jsData.get(strIDAttName).getAsString());
String strID = ValueUtilz.asString(jsData.get(strIDAttName).isJsonNull() ? null : jsData.get(strIDAttName).getAsString());
if(strID == null) throw new IllegalStateException("there is no resourceId");
Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Will change metadata of document " + strID);
......@@ -187,7 +188,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
else if(AloeEventObserver.EVENT_TYPE_DELETED_RESOURCE.equals(strEventType))
{
String strID = ValueUtilz.asString(jsData.get(strIDAttName).getAsString());
String strID = ValueUtilz.asString(jsData.get(strIDAttName).isJsonNull() ? null : jsData.get(strIDAttName).getAsString());
if(strID == null) throw new IllegalStateException("there is no resourceId");
Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Will delete document " + strID);
......@@ -201,7 +202,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
flattenToAttValuePairs(jsData, llAttNames, llAttValues);
String strURL = ValueUtilz.asString(jsData.get("aloe_resourceUri").getAsString());
String strURL = ValueUtilz.asString(jsData.get("aloe_resourceUri").isJsonNull() ? null : jsData.get("aloe_resourceUri").getAsString());
if(strURL == null) throw new IllegalStateException("there is no resourceUri");
Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Will crawl and index website " + strURL);
......@@ -221,8 +222,11 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
flattenToAttValuePairs(jsData, llAttNames, llAttValues);
if(jsData.get("aloe_fileName").isJsonNull()) throw new IllegalStateException("there is no fileName");
String strFileName = ValueUtilz.asString(jsData.get("aloe_fileName").getAsString());
if(strFileName == null) throw new IllegalStateException("there is no fileName");
Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Will index file " + strFileName);
String strBase64ContentAttName = "aloe_base64EncodedResourceContent";
......@@ -241,6 +245,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
flattenToAttValuePairs(jsData, llAttNames, llAttValues);
if(jsData.get(strIDAttName).isJsonNull()) throw new IllegalStateException("there is no resourceId");
String strID = ValueUtilz.asString(jsData.get(strIDAttName).getAsString());
if(strID == null) throw new IllegalStateException("there is no resourceId");
Logger.getLogger(this.getClass().getName()).log(Level.INFO, "Will remove document metadata from " + strID);
......@@ -297,7 +302,7 @@ public class DynaQServiceHandlerAloeEventObserver extends DynaQServer implements
else
{
// wir haben einen identifier
hsIDAttNames2Values.add(key2value.getKey(), ValueUtilz.asString(key2value.getValue().getAsString()));
hsIDAttNames2Values.add(key2value.getKey(), ValueUtilz.asString(key2value.getValue().isJsonNull() ? null : key2value.getValue().getAsString()));
}
}
......
......@@ -48,8 +48,8 @@ import de.dfki.inquisition.lucene.Buzzwords;
import de.dfki.inquisition.lucene.IndexAccessor;
import de.dfki.inquisition.lucene.RemoteIndexReader;
import de.dfki.inquisition.lucene.RemoteIndexSearcher;
import de.dfki.inquisition.lucene.RemoteMultiIndexSearcher;
import de.dfki.inquisition.net.DelightServerManager;
import de.dfki.km.leech.metadata.LeechMetadata;
......@@ -106,13 +106,17 @@ public class DynaQServer implements DynaQService, Runnable
protected HashMap<String, RemoteIndexReader> m_hsIndexSetID2IndexReaderHandler = new HashMap<String, RemoteIndexReader>();
private URI m_serverUri;
protected HashMap<String, RemoteIndexReader> m_hsIndexSetID2IndexReaderHandler = new HashMap<String, RemoteIndexReader>();
protected HashMap<String, String> m_hsIndexSetID2IndexReaderUri = new HashMap<String, String>();
protected HashMap<String, RemoteIndexSearcher> m_hsIndexSetID2IndexSearcherHandler = new HashMap<String, RemoteIndexSearcher>();
......@@ -120,12 +124,8 @@ public class DynaQServer implements DynaQService, Runnable
protected HashMap<String, String> m_hsIndexSetID2IndexSearcherUri = new HashMap<String, String>();
protected HashMap<String, LuceneIndexSet> m_hsIndexSetID2LuceneIndexSet = new HashMap<String, LuceneIndexSet>();
private URI m_serverUri;
protected int m_servicePort = -1;
......@@ -581,46 +581,68 @@ public class DynaQServer implements DynaQService, Runnable
@Documentation(hide = true)
private void deployReaderWriterServices(String indexSetId, LuceneIndexSet luceneIndexSet) throws Exception
public boolean enabled4RemoteAccess(String indexSetId)
{
// hier wird jetzt jeweils ein ReaderService und SearcherService gestartet...der port ist ja derselbe wie bei unserem
// VaterService, also gehts lediglich um den Handler
RemoteIndexReader readerHandler = IndexAccessor.getMultiIndexReader(luceneIndexSet.getIndexPaths());
return m_hsIndexSetID2IndexReaderUri.containsKey(indexSetId);
}
// das sollte eigentlich nicht der Fall sein, allerdings ist es geschickt beim developen, wenn der Server hier gestartet wird
//TODO hier hat er keinen Pfad, da das default-Servlet keine entsprechende Annotation hat. Muß das sein? Ich kommentiere es mal aus
// if(!DelightServerManager.isServerUpAndRunning(m_servicePort)) DelightServerManager.startServer( m_servicePort);
URI readerUri = DelightServerManager.addHandler(m_servicePort, readerHandler, indexSetId + "_reader", false);
m_hsIndexSetID2IndexReaderHandler.put(indexSetId, readerHandler);
m_hsIndexSetID2IndexReaderUri.put(indexSetId, readerUri.toString());
RemoteIndexSearcher indexSearcherHandler = IndexAccessor.getMultiIndexSearcher(luceneIndexSet.getIndexPaths());
URI searcherUri = DelightServerManager.addHandler(m_servicePort, indexSearcherHandler, indexSetId + "_searcher", false);
m_hsIndexSetID2IndexSearcherHandler.put(indexSetId, indexSearcherHandler);
m_hsIndexSetID2IndexSearcherUri.put(indexSetId, searcherUri.toString());
@Override
public List<String> getBuzzwords4IndexCorpus(String text, int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(text, AttributeConfig.IndexAttributes.BODY, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
LinkedList<String> llBuzzwords = new LinkedList<String>();
for (Entry<String, Float> buzzEntry : buzzwordsWithTfIdf.entrySet())
llBuzzwords.add(buzzEntry.getKey());
return llBuzzwords;
}
@Documentation(hide = true)
public boolean enabled4RemoteAccess(String indexSetId)
@Override
public List<String> getBuzzwords4IndexCorpus(String text, String attName4BuzzwordCalculation, int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId)
throws Exception
{
return m_hsIndexSetID2IndexReaderUri.containsKey(indexSetId);
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(text, attName4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
LinkedList<String> llBuzzwords = new LinkedList<String>();
for (Entry<String, Float> buzzEntry : buzzwordsWithTfIdf.entrySet())
llBuzzwords.add(buzzEntry.getKey());
return llBuzzwords;
}
@Override
public List<String> getBuzzwords4IndexCorpus(String text, int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId) throws Exception
public List<String> getBuzzwords4IndexCorpusDoc(String docUId, Set<String> attNames4BuzzwordCalculation, int maxNumberOfBuzzwords, boolean skipSimilarTerms,
String indexSetId) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(text, AttributeConfig.IndexAttributes.BODY, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
LinkedList<String> llBuzzwords = new LinkedList<String>();
......@@ -633,15 +655,19 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public List<String> getBuzzwords4IndexCorpus(String text, String attName4BuzzwordCalculation, int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId)
throws Exception
public List<String> getBuzzwords4IndexCorpusDoc(String docIdAttName, String docId, Set<String> attNames4BuzzwordCalculation, int maxNumberOfBuzzwords,
boolean skipSimilarTerms, String indexSetId) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
List<String> lUids = this.getDocumentAttributeValues(docIdAttName, docId, LeechMetadata.id, indexSetId);
if(lUids.isEmpty()) throw new IllegalStateException("document has no leech UID");
String docUId = lUids.iterator().next();
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(text, attName4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
LinkedList<String> llBuzzwords = new LinkedList<String>();
......@@ -653,6 +679,15 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf4IndexCorpus(String text, int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId)
throws Exception
......@@ -670,6 +705,7 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf4IndexCorpus(String text, String attName4BuzzwordCalculation, int maxNumberOfBuzzwords,
boolean skipSimilarTerms, String indexSetId) throws Exception
......@@ -687,6 +723,45 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf4IndexCorpusDoc(String docUId, Set<String> attNames4BuzzwordCalculation, int maxNumberOfBuzzwords,
boolean skipSimilarTerms, String indexSetId) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
return buzzwordsWithTfIdf;
}
@Override
public LinkedHashMap<String, Float> getBuzzwordsWithTfIdf4IndexCorpusDoc(String docIdAttName, String docId, Set<String> attNames4BuzzwordCalculation,
int maxNumberOfBuzzwords, boolean skipSimilarTerms, String indexSetId) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
List<String> lUids = this.getDocumentAttributeValues(docIdAttName, docId, LeechMetadata.id, indexSetId);
if(lUids.isEmpty()) throw new IllegalStateException("document has no leech UID");
String docUId = lUids.iterator().next();
LinkedHashMap<String, Float> buzzwordsWithTfIdf =
Buzzwords.getBuzzwordsWithTfIdf(docUId, attNames4BuzzwordCalculation, maxNumberOfBuzzwords, skipSimilarTerms, luceneIndexSet.getIndexPaths());
return buzzwordsWithTfIdf;
}
@Override
public Map<String, Collection<String>> getDocument(String docUId, String indexSetId) throws Exception
{
......@@ -700,7 +775,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public Map<String, Collection<String>> getDocument(String docIdAttName, String docId, String indexSetId) throws Exception
{
......@@ -715,10 +789,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public Collection<String> getDocumentAttributeValues(String docUId, String attName, String indexSetId) throws Exception
{
......@@ -750,7 +820,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public List<Map<String, Collection<String>>> getDocuments(String docIdAttName, String docId, String indexSetId) throws Exception
{
......@@ -773,6 +842,7 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public List<Map<String, Collection<String>>> getDocuments(String[] docUIds, String indexSetId) throws Exception
{
......@@ -795,6 +865,19 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public int getIndexingQueueSize(String indexSetId) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
SimpleIndexInterface indexInterface = new SimpleIndexInterface(luceneIndexSet);
return indexInterface.getIndexingQueueSize();
}
@Documentation(hide = true)
public LuceneIndexSet getIndexSet(String indexSetId)
{
......@@ -803,6 +886,7 @@ public class DynaQServer implements DynaQService, Runnable
@Documentation(hide = true)
@Override
public String getIndexSetDynaQServiceURL(String indexSetURL)
......@@ -819,6 +903,7 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public Collection<String> getIndexSetIds()
{
......@@ -827,6 +912,10 @@ public class DynaQServer implements DynaQService, Runnable
@Documentation(hide = true)
@Override
public String getIndexSetReaderURL(String indexSetId)
......@@ -836,8 +925,6 @@ public class DynaQServer implements DynaQService, Runnable
@Documentation(hide = true)
@Override
public String getIndexSetSearcherURL(String indexSetId)
......@@ -847,6 +934,9 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public Set<String> getMatchingQueryTerms(String queryString, List<String> atts4Search, String indexSetId) throws Exception
{
......@@ -861,7 +951,6 @@ public class DynaQServer implements DynaQService, Runnable
/**
* Gets the URI this server is available from
*
......@@ -875,6 +964,9 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public SearchResult getSimilarDocs(String text, Map<String, Float> simAtts2Boost, String indexSetId) throws Exception
{
......@@ -883,6 +975,8 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public SearchResult getSimilarDocs(String text, Map<String, Float> simAtts2Boost, String indexSetId4Search, String indexSetId4TfIdfCalculation) throws Exception
{
......@@ -920,6 +1014,8 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public SearchResult getSimilarDocs(String text, Map<String, Float> simAtts2Boost, String attName4SearchAndBuzzwordCalculation, String indexSetId4Search,
String indexSetId4TfIdfCalculation) throws Exception
......@@ -957,7 +1053,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public List<ScoredDocument> getSimilarDocs(String docIdAttName, String docId4QueryDoc, Map<String, Float> simAtts2Boost,
Map<String, String> queryDocAttName4ContextDocId2contextDocAttName4Id, String resultDocsIdAttName, String indexSetId) throws Exception
......@@ -974,9 +1069,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public List<ScoredDocument> getSimilarDocs(String docIdAttName, String docId, Map<String, Float> simAtts2Boost, String resultDocsIDAttName, String indexSetId)
throws Exception
......@@ -996,6 +1088,7 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public LinkedHashMap<DocID, MetadataQueueEntry> getWaitingDocumentAppends(String indexSetId) throws Exception
{
......@@ -1011,8 +1104,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
synchronized public void index(InputStream fileAsStream, String fileName, Map<String, Collection<String>> attNames2Values, String indexSetId) throws Exception
{
......@@ -1028,7 +1119,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
synchronized public void index(String url, int crawlingDepth, Map<String, Collection<String>> attNames2Values, String indexSetId) throws Exception
{
......@@ -1044,9 +1134,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public void indexDontBlock(InputStream fileAsStream, String fileName, Map<String, Collection<String>> attNames2Values, String indexSetId) throws Exception
{
......@@ -1062,8 +1149,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public void indexDontBlock(String url, int crawlingDepth, Map<String, Collection<String>> attNames2Values, String indexSetId) throws Exception
{
......@@ -1080,9 +1165,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public void insertDocument(Map<String, Collection<String>> attNames2Values, String indexSetId) throws Exception
{
......@@ -1164,7 +1246,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public void removeIndexSet(String indexSetId)
{
......@@ -1193,7 +1274,6 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public boolean replaceDocumentAttributes(String docUId, Map<String, Collection<String>> attNames2Values, Set<String> not2replaceAtts, String indexSetId)
throws Exception
......@@ -1300,7 +1380,6 @@ public class DynaQServer implements DynaQService, Runnable
@Documentation(hide = true)
public void shutDown()
{
......@@ -1323,15 +1402,25 @@ public class DynaQServer implements DynaQService, Runnable
@Override
public int getIndexingQueueSize(String indexSetId) throws Exception
@Documentation(hide = true)
private void deployReaderWriterServices(String indexSetId, LuceneIndexSet luceneIndexSet) throws Exception
{
LuceneIndexSet luceneIndexSet = m_hsIndexSetID2LuceneIndexSet.get(indexSetId);
if(luceneIndexSet == null) throw new DynaQException("Index set '" + indexSetId + "' not known by DynaQService");
// hier wird jetzt jeweils ein ReaderService und SearcherService gestartet...der port ist ja derselbe wie bei unserem
// VaterService, also gehts lediglich um den Handler
RemoteIndexReader readerHandler = IndexAccessor.getMultiIndexReader(luceneIndexSet.getIndexPaths());
SimpleIndexInterface indexInterface = new SimpleIndexInterface(luceneIndexSet);
return indexInterface.getIndexingQueueSize();
// das sollte eigentlich nicht der Fall sein, allerdings ist es geschickt beim developen, wenn der Server hier gestartet wird
//XXX hier hat er keinen Pfad, da das default-Servlet keine entsprechende Annotation hat. Muß das sein? Ich kommentiere es mal aus
// if(!DelightServerManager.isServerUpAndRunning(m_servicePort)) DelightServerManager.startServer( m_servicePort);
URI readerUri = DelightServerManager.addHandler(m_servicePort, readerHandler, indexSetId + "_reader", false);
m_hsIndexSetID2IndexReaderHandler.put(indexSetId, readerHandler);
m_hsIndexSetID2IndexReaderUri.put(indexSetId, readerUri.toString());
RemoteIndexSearcher indexSearcherHandler = IndexAccessor.getMultiIndexSearcher(luceneIndexSet.getIndexPaths());
URI searcherUri = DelightServerManager.addHandler(m_servicePort, indexSearcherHandler, indexSetId + "_searcher", false);
m_hsIndexSetID2IndexSearcherHandler.put(indexSetId, indexSearcherHandler);
m_hsIndexSetID2IndexSearcherUri.put(indexSetId, searcherUri.toString());
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment