gibney.org : Technology : Javascript : Bookmarklets : Spider


just a logo :)

search:


terms of service



imprint

gibney.org
is powered by m1d1





Spider
(Entry Nr. 328, by user 1 | edit)
/********************************************************************************* Info Div Later, it will be put into an include file. *********************************************************************************/ function createDiv(id,posX,posY,content) { var menuDiv=document.createElement("div"); menuDiv.id=id; menuDiv.style.position='absolute'; menuDiv.style.top =posY+'px'; menuDiv.style.left=posX+'px'; menuDiv.innerHTML=content; document.getElementsByTagName('body').item(0).appendChild(menuDiv); return menuDiv; } function addEditor() // objId { var id = 'cm1'; var editorWidth=800; var editorHeight=600; var EditorX=100; var EditorY=100; var content = '
'+ '
'; content+='
'; content+='OK'; content+='   X
'; content+='
'; var editor=createDiv(id,EditorX,EditorY,content); editor.style.zIndex=9999; var r=document.getElementById("ge_editor_obj_edit"); return r; } /********************************************************************************* Spider Enables you to spider domains. Later, it will be put into an include file. *********************************************************************************/ Spider=new function() { this.NrPagesScanned=0; this.Url=""; this.Content=""; // The Links object will be used like an a associative array. // There are no associative arrays in javascript, but an Object behaves very much like an associative array. // Links will store all Links the Spider found so far. The value will be the status. ("new" or "spidered") this.Links=new Object(); this.Links["/"]="new"; } Spider.cutOneFolder=function(Url) { if (Url.substr(Url.length-1,1)=="/") Url=Url.substr(0,Url.length-1); var LastSlash=Url.lastIndexOf("/"); var Url=Url.substr(0,LastSlash+1); return Url; } Spider.scanPage=function() { Info.value+="Scanning: "+this.Url+"\n"; var HREFs=this.Content.match(/href=["'].+?["']/gi); for (var i in HREFs) { var BaseUrl=this.Url.replace(/\/[^\/]+$/,"/"); var HREF=HREFs[i]; var Link=HREF.substr(6,HREF.length-7); var RawLink=Link; if (Link.substr(0,5)=="http:") continue; while (Link.substr(0,3)=="../") { Link=Link.substr(3,Link.length-3); BaseUrl=Spider.cutOneFolder(BaseUrl); } if (BaseUrl.length<1) { Info.value+="Defect Link at "+this.Url+" : "+RawLink+"\n"; continue; } if (Link=="./") continue; if (Link.substr(0,2)=="./") Link=Link.substr(2,Link.length-2); if (Link.substr(0,1)=="/") Link=Link.substr(1); if (Link.length<2) continue; Link=Link.replace(/#.*$/,""); Link=BaseUrl+Link; if (!this.Links.hasOwnProperty(Link)) this.Links[Link]="new"; } this.Links[this.Url]="spidered"; this.NrPagesScanned++; } Spider.readPage=function(Url) { this.Url=Url; var xmlHttp = null; // Mozilla, Opera, Safari sowie Internet Explorer 7 if (typeof XMLHttpRequest != 'undefined') xmlHttp = new XMLHttpRequest(); if (!xmlHttp) { // Internet Explorer 6 und älter try { xmlHttp = new ActiveXObject("Msxml2.XMLHTTP"); } catch(e) { try { xmlHttp = new ActiveXObject("Microsoft.XMLHTTP"); } catch(e) { xmlHttp = null; } } } xmlHttp.open('GET', Url, false); //xmlHttp.onreadystatechange = function () { if (xmlHttp.readyState == 4) Spider.scanPage(Url,xmlHttp.responseText); }; xmlHttp.send(null); this.Content=xmlHttp.responseText; } Spider.getUnspideredUrl=function() { for (var Url in this.Links) { if (this.Links[Url]=="new") return Url; } return false; } Spider.spiderNextLink=function() { var Url=this.getUnspideredUrl(); if (!Url) return false; this.readPage(Url); this.scanPage(); PageFunction(this.Content); return true; } /********************************************************************************* *********************************************************************************/ function showSpiderLinks() { Info.value="Links:\n"; for (var Link in Spider.Links) { var Status=Spider.Links[Link]; Info.value+=Link+" = "+Status+"\n"; } } function showAndDestroyBiggestR2() { var big=0; var bigKey=false; for (var Key in R2) { if(R2[Key]>big) { big=R2[Key]; bigKey=Key; } } if (bigKey) { Info.value+=bigKey+":"+R2[bigKey]+"\n"; delete(R2[bigKey]); return true; } return false; } function copyObject(O) { var r = new Object(); for (var i in O) { if (typeof O[i] == 'object') { r[i] = new copyObject(O[i]); } else r[i] = O[i]; } return r; } function showResults() { R2=copyObject(R); Info.value="Results:\n\n"; while (showAndDestroyBiggestR2()) x=1; } function PageFunction(B) { var C=B.replace(/<.*?>/g," "); var Words=C.match(/[^a-z][a-z]+ [a-z]+ [a-z]+[^a-z]/gi); for (var Key in Words) { var Word=Words[Key].match(/[a-z]+ [a-z]+ [a-z]+/gi)[0]; if (R[Word]) R[Word]++; else R[Word]=1; } } R=new Object(); Info=addEditor(); for (var i=0;i<20;i++) { Spider.spiderNextLink(); showSpiderLinks(); Info.value+=i+"\n"; }
Create a new entry at this position:

Title:
A short summary:
Entry text:
Text-Mode:
!