1 /**
  2  * @class Monk.component.WordCloud
  3  * @description Shows analysis results as word cloud.
  4  * @extends Workbench.component.Component
  5  * @author Stéfan
  6  */
  7 
  8 Monk.component.WordCloud = function(args) {
  9     
 10     Monk.component.WordCloud.superclass.constructor.call(this, args);
 11     
 12 }
 13 
 14 Workbench.extend(Monk.component.WordCloud, Workbench.component.Component, {
 15    
 16     label: 'Analysis Word Cloud',
 17     description: 'A simple word cloud visualization of analysis results',
 18     "window": this.window,
 19     
 20     handle : function(monkEvent, data) {
 21 		if (monkEvent.instanceOf(Monk.event.workset.AnalysisResultsProcessed)) {
 22             var features = Monk.component.dataManager.getFeatureList();
 23             if (features.length > 0) {
 24 				var wordCloudItems = []
 25 				for (var i=0; i<features.length;i++) {
 26 					wordCloudItems.push({word: features[i].name, weight: (1-(i/features.length))})
 27 				}
 28 				wordCloudItems.sort(function (a,b) {return a.word>b.word ? 1 : a.word<b.word ? -1 : 0});
 29 				this.redrawWordCloud(wordCloudItems);
 30             }
 31         } else if (monkEvent.instanceOf(Monk.event.chunk.ChunkSelected)) {
 32 			if (data.chunkType != 'collections-root' && data.chunkType != 'collection') {
 33 				document.getElementById('label').innerHTML="<h2>WordCloud for "+data.label
 34 	            Monk.data.chunk.retrieveChunkFrequencies([data]);			
 35 			}
 36         } else if (monkEvent.instanceOf(Monk.event.chunk.ChunkChecked)) {
 37 			this.loadWorkset();
 38         } else if (monkEvent.instanceOf(Monk.event.chunk.ChunkFrequenciesRetrieved)) {
 39 			var wordCloudItems = this.convertChunkFrequenciesToWordCloudItems(data.freqs);
 40 			this.redrawWordCloud(wordCloudItems, {dampen : true, sort : 'value'});
 41         } else if (monkEvent.instanceOf(Monk.event.project.WorksetsSelected)) {
 42 			Monk.data.workset.retrieveDunningsLogLikelyHood({worksets: data})
 43 			document.getElementById('label').innerHTML="<h2>Dunnings Log Likelihood WordCloud of lemmas for these worksets:</h2><ul><li class='a'>"+data[0].label+' ('+data[0].id+")<li><li class='b'>"+data[1].label+' ('+data[1].id+')<li></ul>'
 44         } else if (monkEvent.instanceOf(Monk.event.workset.DunningsLogLikelyHoodRetrieved)) {
 45 			var entries = data.response.responseXML.firstChild.childNodes;
 46 			var wordCloudItems = [];
 47 			var re = /\(.*?\)/g;
 48 			for (var i=0;i<entries.length;i++) {
 49 				if (entries[i].nodeType==1 && entries[i].tagName=='entry') {
 50 					var string = entries[i].childNodes[1].childNodes[1].textContent;
 51 					var pos = new Array();
 52 					while ((match = re.exec(string)) != null) {
 53 						pos.push(match[0]);
 54 					}
 55 					pos = pos.join(' ');
 56 //					if (pos && pos.match(/(dt|pf|cc|pp|acp|pn|ph|vm|d|va|da|cs|crq|xx|av)/)) {continue;}
 57 					wordCloudItems.push({
 58 						word : string.replace(/\s+\(.*?\)/g,'').replace(/\s+/g,' ')
 59 						,pos : pos
 60 						,raw : parseFloat(entries[i].childNodes[3].childNodes[13].textContent)
 61 						,cls : parseFloat(entries[i].childNodes[3].childNodes[3].textContent) < parseFloat(entries[i].childNodes[3].childNodes[7].textContent) ? 'a' : 'b'
 62 					})
 63 				}
 64 			}
 65 			this.redrawWordCloud(wordCloudItems);
 66         }
 67 		
 68     }
 69 	
 70 	,init : function() {
 71 		
 72 		this.posMenu = new Ext.menu.Menu([
 73 			new Ext.menu.CheckItem({
 74 				text : 'function words'
 75 				,hideOnClick : false
 76 				,checked : false
 77 				,name : 'function'
 78 				,pos : ['acp','an','av','cc','ch','cj','cop','crq','cs','d','da','dt','jn','nu','pd','pf','pi','pj','pn','po','pp','pq','pr','ptl','pu','px','sy','uh','va','vm','xx','zz']
 79 			})
 80 			,new Ext.menu.CheckItem({
 81 				text : 'content words'
 82 				,hideOnClick : false
 83 				,checked : true
 84 				,name : 'content'
 85 				,pos : ['fo','fr','ge','gr','it','j','la','n','v']
 86 			})
 87 			,new Ext.menu.CheckItem({
 88 				text : 'proper nouns'
 89 				,hideOnClick : false
 90 				,checked : false
 91 				,name : 'proper'
 92 				,pos : ['jp','ng','np']
 93 			})
 94 		
 95 		]);
 96 		this.posMenu.addListener('itemclick', function(){
 97 			var me = this;
 98 			setTimeout(function(){
 99 				me.redrawWordCloud(me.cachedCloudData)
100 			}, 100)
101 		}, this)
102 
103 		new Ext.Viewport({
104 			items : [
105 				{
106 					layout : 'fit'
107 					,items : [
108 						{
109 							id : 'label'
110 							,autoHeight : true
111 						}
112 						,{
113 							id : 'cloud'
114 //							,autoHeight : true
115 							,height : 500
116 						}
117 					]
118 					,tbar : [
119 						{
120 							text : 'Parts of speech'
121 							,tooltip : 'Use this menu to filter parts of speech from the word cloud.'
122 							,menu : this.posMenu
123 						}
124 					]
125 				}
126 			]
127 		})
128 		this.loadWorkset()	
129 	}
130 	
131 	,loadWorkset: function() {
132 		var chunksArray = Monk.component.dataManager.getWorkset().workList.split(',');
133 		if (chunksArray == null || chunksArray.length==0) {return}
134 		var chunks = new Array();
135 		for (var i=0;i<chunksArray.length;i++) {chunks[i]= {id : chunksArray[i]}}
136 		document.getElementById('label').innerHTML='Word cloud for workset '+Monk.component.dataManager.getWorksetLabel()
137         Monk.data.chunk.retrieveChunkFrequencies(chunks);
138 	}
139 	,convertChunkFrequenciesToWordCloudItems : function(freqs) {
140 		var wordCloudItems = [];
141 		var freq;
142 		for (var i=0;i<freqs.length;i++) {
143 			freq = freqs[i];
144 //			if (freq.pos && freq.pos.match(/(dt|pf|cc|pp|acp|pn|ph|vm|d|va|da|cs|crq|xx|av)/) || freqs[i].string.match(/\bsay\b/)) {continue;}
145 			wordCloudItems.push({
146 				word: freqs[i].string,
147 				raw: freqs[i].count
148 			});
149 		}
150 		return wordCloudItems;
151 	},
152 	
153 	redrawWordleCloud : function(cloudData, config) {
154 		if (!config) {config={}}
155 		cloudStringArray = new Array();
156 		for (var i=0; i<cloudData.length; i++) {
157 			cloudStringArray.push(cloudData[i].word+":"+parseInt(cloudData[i].raw*10));
158 		}
159 		var appletHeight = config.height ? config.height : ((window.innerHeight ? window.innerHeight : (document.documentElement ? document.documentElement.clientHeight : (document.body ? document.body.clientHeight : 600)))-50);
160 		var appletWidth = config.width ? config.width : ((window.innerWidth ? window.innerWidth : (document.documentElement ? document.documentElement.clientWidth : (document.body ? document.body.clientWidth : 400)))-50);
161 		document.getElementById(config.renderTo ? config.renderTo : "cloud").innerHTML = '<applet name="wordle" mayscript="mayscript" code="wordle/WordleApplet.class" archive="'+(config.archive ? config.archive : 'wordle.jar')+'" width="'+appletWidth+'" height="'+appletHeight+'"><param name="wordcounts" value="'+cloudStringArray.join(",")+'"/></applet>'
162 		
163 	},
164 	redrawWordCloud : function(cloudData, config) {
165 
166 		if (!config) {config={}}
167 		
168 		if (cloudData.length==0) return
169 
170 		// clone
171 		this.cachedCloudData = [];
172 		for (var i=0;i<cloudData.length;i++) {this.cachedCloudData.push(cloudData[i])}
173 		
174 		// filter items
175 		var filterPosArray = [];
176 		this.posMenu.items.each(function(item) {
177 			if (!item.checked) {
178 				for (var i=0;i<item.pos.length;i++) filterPosArray.push(item.pos[i])
179 			}
180 		}, this)
181 		if (filterPosArray.length>0) {
182 			var filteredItems = [];
183 			var filterPosPattern = filterPosArray.length==0 ? /./ : new RegExp("\\b("+filterPosArray.join('|')+")\\b");
184 			for (var i = 0; i < cloudData.length; i++) {
185 				if (cloudData[i].pos && cloudData[i].pos.match(filterPosPattern)) continue;
186 				filteredItems.push(cloudData[i]);
187 			}
188 			cloudData=filteredItems;			
189 		}
190 		
191 		if (cloudData.length > 0) {
192 		
193 			// determine max and transform
194 			var val;
195 			for (var i = 0; i < cloudData.length; i++) {
196 				val = config.dampen ? Math.sqrt(Math.sqrt(cloudData[i].raw)) : cloudData[i].raw;
197 				cloudData[i].val = val;
198 			}
199 			
200 			cloudData.sort(function(a, b){
201 				return b.raw - a.raw
202 			})
203 			
204 			cloudData = cloudData.splice(0, config.limit ? config.limit : 150);
205 			
206 			var min = cloudData[cloudData.length - 1].val
207 			var max = cloudData[0].val - min;
208 		}
209 		
210 		// build output
211 		var cloudString = "";
212 		for (var i=0; i<cloudData.length; i++) {
213 			cloudString += '<span class="'+cloudData[i].cls+'" style="font-size: '+(parseInt((cloudData[i].val-min)*75/max)+1)+'pt" title="' + cloudData[i].word +': ' +cloudData[i].raw+ '">'+cloudData[i].word+"</span> ";
214 		}
215 		document.getElementById(config.renderTo ? config.renderTo : "cloud").innerHTML = cloudString
216 		
217 	}
218 	,cachedCloudData : []
219 });