Spider (Entry Nr. 328, by user 1 | edit) |
|
/*********************************************************************************
Info Div
Later, it will be put into an include file.
*********************************************************************************/
function createDiv(id,posX,posY,content)
{
var menuDiv=document.createElement("div");
menuDiv.id=id;
menuDiv.style.position='absolute';
menuDiv.style.top =posY+'px';
menuDiv.style.left=posX+'px';
menuDiv.innerHTML=content;
document.getElementsByTagName('body').item(0).appendChild(menuDiv);
return menuDiv;
}
function addEditor() // objId
{
var id = 'cm1';
var editorWidth=800;
var editorHeight=600;
var EditorX=100;
var EditorY=100;
var content =
''+
' ';
content+=' ';
content+=' OK';
content+=' X';
content+=' ';
var editor=createDiv(id,EditorX,EditorY,content);
editor.style.zIndex=9999;
var r=document.getElementById("ge_editor_obj_edit");
return r;
}
/*********************************************************************************
Spider
Enables you to spider domains.
Later, it will be put into an include file.
*********************************************************************************/
Spider=new function()
{
this.NrPagesScanned=0;
this.Url="";
this.Content="";
// The Links object will be used like an a associative array.
// There are no associative arrays in javascript, but an Object behaves very much like an associative array.
// Links will store all Links the Spider found so far. The value will be the status. ("new" or "spidered")
this.Links=new Object();
this.Links["/"]="new";
}
Spider.cutOneFolder=function(Url)
{
if (Url.substr(Url.length-1,1)=="/") Url=Url.substr(0,Url.length-1);
var LastSlash=Url.lastIndexOf("/");
var Url=Url.substr(0,LastSlash+1);
return Url;
}
Spider.scanPage=function()
{
Info.value+="Scanning: "+this.Url+"\n";
var HREFs=this.Content.match(/href=["'].+?["']/gi);
for (var i in HREFs)
{
var BaseUrl=this.Url.replace(/\/[^\/]+$/,"/");
var HREF=HREFs[i];
var Link=HREF.substr(6,HREF.length-7);
var RawLink=Link;
if (Link.substr(0,5)=="http:") continue;
while (Link.substr(0,3)=="../")
{
Link=Link.substr(3,Link.length-3);
BaseUrl=Spider.cutOneFolder(BaseUrl);
}
if (BaseUrl.length<1)
{
Info.value+="Defect Link at "+this.Url+" : "+RawLink+"\n";
continue;
}
if (Link=="./") continue;
if (Link.substr(0,2)=="./") Link=Link.substr(2,Link.length-2);
if (Link.substr(0,1)=="/") Link=Link.substr(1);
if (Link.length<2) continue;
Link=Link.replace(/#.*$/,"");
Link=BaseUrl+Link;
if (!this.Links.hasOwnProperty(Link)) this.Links[Link]="new";
}
this.Links[this.Url]="spidered";
this.NrPagesScanned++;
}
Spider.readPage=function(Url)
{
this.Url=Url;
var xmlHttp = null;
// Mozilla, Opera, Safari sowie Internet Explorer 7
if (typeof XMLHttpRequest != 'undefined') xmlHttp = new XMLHttpRequest();
if (!xmlHttp) {
// Internet Explorer 6 und älter
try {
xmlHttp = new ActiveXObject("Msxml2.XMLHTTP");
} catch(e) {
try {
xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
} catch(e) {
xmlHttp = null;
}
}
}
xmlHttp.open('GET', Url, false);
//xmlHttp.onreadystatechange = function () { if (xmlHttp.readyState == 4) Spider.scanPage(Url,xmlHttp.responseText); };
xmlHttp.send(null);
this.Content=xmlHttp.responseText;
}
Spider.getUnspideredUrl=function()
{
for (var Url in this.Links)
{
if (this.Links[Url]=="new") return Url;
}
return false;
}
Spider.spiderNextLink=function()
{
var Url=this.getUnspideredUrl();
if (!Url) return false;
this.readPage(Url);
this.scanPage();
PageFunction(this.Content);
return true;
}
/*********************************************************************************
*********************************************************************************/
function showSpiderLinks()
{
Info.value="Links:\n";
for (var Link in Spider.Links)
{
var Status=Spider.Links[Link];
Info.value+=Link+" = "+Status+"\n";
}
}
function showAndDestroyBiggestR2()
{
var big=0;
var bigKey=false;
for (var Key in R2)
{
if(R2[Key]>big)
{
big=R2[Key];
bigKey=Key;
}
}
if (bigKey)
{
Info.value+=bigKey+":"+R2[bigKey]+"\n";
delete(R2[bigKey]);
return true;
}
return false;
}
function copyObject(O)
{
var r = new Object();
for (var i in O)
{
if (typeof O[i] == 'object')
{
r[i] = new copyObject(O[i]);
}
else r[i] = O[i];
}
return r;
}
function showResults()
{
R2=copyObject(R);
Info.value="Results:\n\n";
while (showAndDestroyBiggestR2()) x=1;
}
function PageFunction(B)
{
var C=B.replace(/<.*?>/g," ");
var Words=C.match(/[^a-z][a-z]+ [a-z]+ [a-z]+[^a-z]/gi);
for (var Key in Words)
{
var Word=Words[Key].match(/[a-z]+ [a-z]+ [a-z]+/gi)[0];
if (R[Word]) R[Word]++; else R[Word]=1;
}
}
R=new Object();
Info=addEditor();
for (var i=0;i<20;i++)
{
Spider.spiderNextLink();
showSpiderLinks();
Info.value+=i+"\n";
} |
|
|
|