API/Samples/Groovy/Writer/MostPopularWords
From Apache OpenOffice Wiki
About the MostPopularWords Example
This example shows how to get the text of whole document or its selected parts and how to display some statistics (exactly the most popular words) for this text in info message box in a Groovy macro.
The Code
/* Some OpenOffice.org API classes. For more information on these classes and the OpenOffice.org API, see the OpenOffice.org Developers Guide at: https://www.openoffice.org/api/ */ import com.sun.star.uno.UnoRuntime import com.sun.star.uno.XComponentContext import com.sun.star.container.XIndexAccess import com.sun.star.container.XNameContainer import com.sun.star.beans.XPropertySet import com.sun.star.frame.XDesktop import com.sun.star.frame.XModel import com.sun.star.awt.XWindow import com.sun.star.awt.XWindowPeer import com.sun.star.awt.XMessageBoxFactory import com.sun.star.awt.XMessageBox import com.sun.star.awt.Rectangle import com.sun.star.lang.XComponent import com.sun.star.lang.XMultiComponentFactory import com.sun.star.lang.XMultiServiceFactory import com.sun.star.view.XSelectionSupplier import com.sun.star.text.XTextDocument import com.sun.star.text.XTextRange import com.sun.star.text.XText // The number of words to display as a result final int NW = 5 // These words should be ignored when searching for most popular words final List<String> ignoreList = ['a', 'an', 'the', 'in', 'for', 'to', 'at', 'of', 'by', 'and', 'or', 'nor', 'whether', 'as', 'with', 'on', 'through', 'when'] class UnoCategory { public static Object uno(Object unoObj, Class clazz) { UnoRuntime.queryInterface(clazz, unoObj) } public static Object getAt(XPropertySet pset, String pname) { pset.getPropertyValue(pname) } public static void putAt(XPropertySet pset, String pname, Object newValue) { pset.setPropertyValue(pname, newValue) } public static Object getAt(XIndexAccess ndx, int x) { ndx.getByIndex(x) } } // The algorithmic part of this macro List<String> getMostPopularWords(String text, List<String> ignoreList, int count) { words = (text =~ /\w+/) freqMap = [:] words.each { String lcWord = it.toLowerCase() if (!ignoreList.contains(lcWord)) { if (null == freqMap[lcWord]) { freqMap[lcWord] = 1 } else { freqMap[lcWord]++ } } } wordList = freqMap.keySet().toList() wordList.sort {freqMap[it]} return (count >= wordList.size()) ? wordList : wordList[-1..-count] } // This function is used to get the text of whole document or its selected parts String getText() { String text = "" use (UnoCategory) { def oDoc = XSCRIPTCONTEXT.document def oModel = oDoc.uno(XModel) def oSelectionSupplier = oModel.currentController.uno(XSelectionSupplier) def oIndexAccess = oSelectionSupplier.selection.uno(XIndexAccess) def count = oIndexAccess.count def xText = oDoc.uno(XTextDocument).text if (count > 1) { // More than one selection // Work backwards so that multiple selections stay valid. for (i in count - 1..0) { // get the XTextRange of the selection oTextRange = oIndexAccess[i].uno(XTextRange) text += ' ' + oTextRange.string } } else { oTextRange = oIndexAccess[0].uno(XTextRange) if (0 == oTextRange.string.size()) { // The whole document, no selection text = xText.string } else { // Exactly one selection text = oTextRange.string } } } return text } // Getting parent window for message box XWindow getParentWindow(xContext) { use (UnoCategory) { XMultiComponentFactory xMCF = xContext.serviceManager Object boxModel = xMCF.createInstanceWithContext("com.sun.star.frame.Desktop", xContext) XDesktop xd = boxModel.uno(XDesktop) XComponent box = xd.currentComponent XModel model = box.uno(XModel) return model.currentController.frame.containerWindow } } // Function to display info message box def showInfoMsg(title, text) { use (UnoCategory) { XWindow pWnd = getParentWindow(XSCRIPTCONTEXT.componentContext) XWindowPeer pWndPeer = pWnd.uno(XWindowPeer) XMessageBoxFactory msgBoxFactory = pWndPeer.toolkit.uno(XMessageBoxFactory) XMessageBox msgBox = msgBoxFactory.createMessageBox(pWndPeer, new Rectangle(), "infobox", 0, title, text) msgBox.execute() } } showInfoMsg('The most popular words', getMostPopularWords(getText(), ignoreList, NW).join(' '))