1 /** 2 * @class Monk.component.WordCloud 3 * @description Shows analysis results as word cloud. 4 * @extends Workbench.component.Component 5 * @author Stéfan 6 */ 7 8 Monk.component.WordCloud = function(args) { 9 10 Monk.component.WordCloud.superclass.constructor.call(this, args); 11 12 } 13 14 Workbench.extend(Monk.component.WordCloud, Workbench.component.Component, { 15 16 label: 'Analysis Word Cloud', 17 description: 'A simple word cloud visualization of analysis results', 18 "window": this.window, 19 20 handle : function(monkEvent, data) { 21 if (monkEvent.instanceOf(Monk.event.workset.AnalysisResultsProcessed)) { 22 var features = Monk.component.dataManager.getFeatureList(); 23 if (features.length > 0) { 24 var wordCloudItems = [] 25 for (var i=0; i<features.length;i++) { 26 wordCloudItems.push({word: features[i].name, weight: (1-(i/features.length))}) 27 } 28 wordCloudItems.sort(function (a,b) {return a.word>b.word ? 1 : a.word<b.word ? -1 : 0}); 29 this.redrawWordCloud(wordCloudItems); 30 } 31 } else if (monkEvent.instanceOf(Monk.event.chunk.ChunkSelected)) { 32 if (data.chunkType != 'collections-root' && data.chunkType != 'collection') { 33 document.getElementById('label').innerHTML="<h2>WordCloud for "+data.label 34 Monk.data.chunk.retrieveChunkFrequencies([data]); 35 } 36 } else if (monkEvent.instanceOf(Monk.event.chunk.ChunkChecked)) { 37 this.loadWorkset(); 38 } else if (monkEvent.instanceOf(Monk.event.chunk.ChunkFrequenciesRetrieved)) { 39 var wordCloudItems = this.convertChunkFrequenciesToWordCloudItems(data.freqs); 40 this.redrawWordCloud(wordCloudItems, {dampen : true, sort : 'value'}); 41 } else if (monkEvent.instanceOf(Monk.event.project.WorksetsSelected)) { 42 Monk.data.workset.retrieveDunningsLogLikelyHood({worksets: data}) 43 document.getElementById('label').innerHTML="<h2>Dunnings Log Likelihood WordCloud of lemmas for these worksets:</h2><ul><li class='a'>"+data[0].label+' ('+data[0].id+")<li><li class='b'>"+data[1].label+' ('+data[1].id+')<li></ul>' 44 } else if (monkEvent.instanceOf(Monk.event.workset.DunningsLogLikelyHoodRetrieved)) { 45 var entries = data.response.responseXML.firstChild.childNodes; 46 var wordCloudItems = []; 47 var re = /\(.*?\)/g; 48 for (var i=0;i<entries.length;i++) { 49 if (entries[i].nodeType==1 && entries[i].tagName=='entry') { 50 var string = entries[i].childNodes[1].childNodes[1].textContent; 51 var pos = new Array(); 52 while ((match = re.exec(string)) != null) { 53 pos.push(match[0]); 54 } 55 pos = pos.join(' '); 56 // if (pos && pos.match(/(dt|pf|cc|pp|acp|pn|ph|vm|d|va|da|cs|crq|xx|av)/)) {continue;} 57 wordCloudItems.push({ 58 word : string.replace(/\s+\(.*?\)/g,'').replace(/\s+/g,' ') 59 ,pos : pos 60 ,raw : parseFloat(entries[i].childNodes[3].childNodes[13].textContent) 61 ,cls : parseFloat(entries[i].childNodes[3].childNodes[3].textContent) < parseFloat(entries[i].childNodes[3].childNodes[7].textContent) ? 'a' : 'b' 62 }) 63 } 64 } 65 this.redrawWordCloud(wordCloudItems); 66 } 67 68 } 69 70 ,init : function() { 71 72 this.posMenu = new Ext.menu.Menu([ 73 new Ext.menu.CheckItem({ 74 text : 'function words' 75 ,hideOnClick : false 76 ,checked : false 77 ,name : 'function' 78 ,pos : ['acp','an','av','cc','ch','cj','cop','crq','cs','d','da','dt','jn','nu','pd','pf','pi','pj','pn','po','pp','pq','pr','ptl','pu','px','sy','uh','va','vm','xx','zz'] 79 }) 80 ,new Ext.menu.CheckItem({ 81 text : 'content words' 82 ,hideOnClick : false 83 ,checked : true 84 ,name : 'content' 85 ,pos : ['fo','fr','ge','gr','it','j','la','n','v'] 86 }) 87 ,new Ext.menu.CheckItem({ 88 text : 'proper nouns' 89 ,hideOnClick : false 90 ,checked : false 91 ,name : 'proper' 92 ,pos : ['jp','ng','np'] 93 }) 94 95 ]); 96 this.posMenu.addListener('itemclick', function(){ 97 var me = this; 98 setTimeout(function(){ 99 me.redrawWordCloud(me.cachedCloudData) 100 }, 100) 101 }, this) 102 103 new Ext.Viewport({ 104 items : [ 105 { 106 layout : 'fit' 107 ,items : [ 108 { 109 id : 'label' 110 ,autoHeight : true 111 } 112 ,{ 113 id : 'cloud' 114 // ,autoHeight : true 115 ,height : 500 116 } 117 ] 118 ,tbar : [ 119 { 120 text : 'Parts of speech' 121 ,tooltip : 'Use this menu to filter parts of speech from the word cloud.' 122 ,menu : this.posMenu 123 } 124 ] 125 } 126 ] 127 }) 128 this.loadWorkset() 129 } 130 131 ,loadWorkset: function() { 132 var chunksArray = Monk.component.dataManager.getWorkset().workList.split(','); 133 if (chunksArray == null || chunksArray.length==0) {return} 134 var chunks = new Array(); 135 for (var i=0;i<chunksArray.length;i++) {chunks[i]= {id : chunksArray[i]}} 136 document.getElementById('label').innerHTML='Word cloud for workset '+Monk.component.dataManager.getWorksetLabel() 137 Monk.data.chunk.retrieveChunkFrequencies(chunks); 138 } 139 ,convertChunkFrequenciesToWordCloudItems : function(freqs) { 140 var wordCloudItems = []; 141 var freq; 142 for (var i=0;i<freqs.length;i++) { 143 freq = freqs[i]; 144 // if (freq.pos && freq.pos.match(/(dt|pf|cc|pp|acp|pn|ph|vm|d|va|da|cs|crq|xx|av)/) || freqs[i].string.match(/\bsay\b/)) {continue;} 145 wordCloudItems.push({ 146 word: freqs[i].string, 147 raw: freqs[i].count 148 }); 149 } 150 return wordCloudItems; 151 }, 152 153 redrawWordleCloud : function(cloudData, config) { 154 if (!config) {config={}} 155 cloudStringArray = new Array(); 156 for (var i=0; i<cloudData.length; i++) { 157 cloudStringArray.push(cloudData[i].word+":"+parseInt(cloudData[i].raw*10)); 158 } 159 var appletHeight = config.height ? config.height : ((window.innerHeight ? window.innerHeight : (document.documentElement ? document.documentElement.clientHeight : (document.body ? document.body.clientHeight : 600)))-50); 160 var appletWidth = config.width ? config.width : ((window.innerWidth ? window.innerWidth : (document.documentElement ? document.documentElement.clientWidth : (document.body ? document.body.clientWidth : 400)))-50); 161 document.getElementById(config.renderTo ? config.renderTo : "cloud").innerHTML = '<applet name="wordle" mayscript="mayscript" code="wordle/WordleApplet.class" archive="'+(config.archive ? config.archive : 'wordle.jar')+'" width="'+appletWidth+'" height="'+appletHeight+'"><param name="wordcounts" value="'+cloudStringArray.join(",")+'"/></applet>' 162 163 }, 164 redrawWordCloud : function(cloudData, config) { 165 166 if (!config) {config={}} 167 168 if (cloudData.length==0) return 169 170 // clone 171 this.cachedCloudData = []; 172 for (var i=0;i<cloudData.length;i++) {this.cachedCloudData.push(cloudData[i])} 173 174 // filter items 175 var filterPosArray = []; 176 this.posMenu.items.each(function(item) { 177 if (!item.checked) { 178 for (var i=0;i<item.pos.length;i++) filterPosArray.push(item.pos[i]) 179 } 180 }, this) 181 if (filterPosArray.length>0) { 182 var filteredItems = []; 183 var filterPosPattern = filterPosArray.length==0 ? /./ : new RegExp("\\b("+filterPosArray.join('|')+")\\b"); 184 for (var i = 0; i < cloudData.length; i++) { 185 if (cloudData[i].pos && cloudData[i].pos.match(filterPosPattern)) continue; 186 filteredItems.push(cloudData[i]); 187 } 188 cloudData=filteredItems; 189 } 190 191 if (cloudData.length > 0) { 192 193 // determine max and transform 194 var val; 195 for (var i = 0; i < cloudData.length; i++) { 196 val = config.dampen ? Math.sqrt(Math.sqrt(cloudData[i].raw)) : cloudData[i].raw; 197 cloudData[i].val = val; 198 } 199 200 cloudData.sort(function(a, b){ 201 return b.raw - a.raw 202 }) 203 204 cloudData = cloudData.splice(0, config.limit ? config.limit : 150); 205 206 var min = cloudData[cloudData.length - 1].val 207 var max = cloudData[0].val - min; 208 } 209 210 // build output 211 var cloudString = ""; 212 for (var i=0; i<cloudData.length; i++) { 213 cloudString += '<span class="'+cloudData[i].cls+'" style="font-size: '+(parseInt((cloudData[i].val-min)*75/max)+1)+'pt" title="' + cloudData[i].word +': ' +cloudData[i].raw+ '">'+cloudData[i].word+"</span> "; 214 } 215 document.getElementById(config.renderTo ? config.renderTo : "cloud").innerHTML = cloudString 216 217 } 218 ,cachedCloudData : [] 219 });