<%@page import="java.io.*"%><%@page import="java.util.*"%>
<%
Object[][] pages=(Object[][]) pageContext.getAttribute("searchPages",pageContext.APPLICATION_SCOPE);
String WEB_INF=getServletContext().getRealPath("/WEB-INF");
File in=new File(WEB_INF+File.separator+"search.data");
long[] modified=(long[]) pageContext.getAttribute("searchPagesModified",pageContext.APPLICATION_SCOPE);
boolean isModified=false;
if(pages==null)
{
isModified=true;
}
else
if(modified==null)
{
isModified=true;
}
else
if(modified[0]!=in.lastModified())
{
isModified=true;
}
if(isModified)
{
// (re)load the pages
pages=loadDatafile(in);
pageContext.setAttribute("searchPages",pages,pageContext.APPLICATION_SCOPE);
modified=new long[]{in.lastModified()};
pageContext.setAttribute("searchPagesModified",modified,pageContext.APPLICATION_SCOPE);
}
boolean wordsSupplied=false;
String words=request.getParameter("words");
if(words==null)
{words="";}
else
{
wordsSupplied=true;
}
%>
<%
if(request.getMethod().equals("POST") || wordsSupplied)
{
// do a search
String[] keywords=getKeywords(words);
ArrayList results=search(keywords,pages);
%>
0.0D)
{
rank+=usefulness;
String desc=description(contents,keywords,chars).trim();
if(desc.length()==0){continue;}
int length=desc.length();
if(length>chars){length=chars;}
double spout=1.0D*(((double) length)/((double) chars));
rank=rank+spout;
boolean biggest=false;
int position=0;
for(;position="+rank);
if(rank <= thisRank[0])
{
//System.out.println("position "+position);
break;
}
if(position==results.size()-1)
{
biggest=true;
}
}
Object[] record=new Object[]{url,new double[]{rank},desc,title};
if(!biggest)
{
results.add(position,record);
}
else
{
results.add(record);
}
}
}
if(!anything && results.size()==0){anything=true;continue;}
break;
}
return results;
}
public String[] getKeywords(String params)
{
ArrayList[] theLot=parseSearchWords(params);
String[] kw=new String[theLot[0].size()];
for(int i=0;i0)
{
if(plus){words.add(sb.toString());mustHave.add(sb.toString());}
else{mustNotHave.add(sb.toString());}
sb=new StringBuffer();
}
continue;
}
if(c=='\"' && inQuotes==true)
{
inQuotes=false;
if(sb.length()>0)
{
String phrase=sb.toString();
if(plus)
{
StringTokenizer st=new StringTokenizer(phrase,"!�$%^&*()-=_+:@~;\'#|\\<>?,./ ");
while(st.hasMoreTokens())
{
words.add(st.nextToken());
}
mustHave.add(phrase);
}
else
{
mustNotHave.add(phrase);
}
sb=new StringBuffer();
}
plus=true;
continue;
}
if(inQuotes)
{
sb.append(c);
continue;
}
if(c=='+' || c=='-' || Character.isWhitespace(c))
{
if(sb.length()>0)
{
if(plus){words.add(sb.toString());mustHave.add(sb.toString());}
else{mustNotHave.add(sb.toString());}
sb=new StringBuffer();
}
}
if(c=='+')
{
plus=true;
continue;
}
if(c=='-')
{
plus=false;
continue;
}
if(Character.isWhitespace(c))
{
plus=true;
continue;
}
sb.append(c);
}
if(sb.length()>0)
{
if(plus){words.add(sb.toString());mustHave.add(sb.toString());}
else{mustNotHave.add(sb.toString());}
}
return new ArrayList[]{words,mustHave,mustNotHave};
}
public static double pageRank(String[] wordRanks,String[] words,String contents,boolean anything)
{
// calculate the relevance of this page to these words
double sum=0.0D;
for(int i=wordRanks.length;i>wordRanks.length-words.length;i--)
{
sum+=(double) (i+1);
}
double total=0.0D;
int found=0;
for(int i=0;i=lower.length()){break;}
int next=lower.indexOf(in,at);
if(next==-1) {break;}
positions.add(new int[]{next});
at=next+in.length();
keywordCount[w]++;
}
}
int outof=0; // possible alignment matches
for(int w=0;woutof){outof=keywordCount[w];}
}
//2. for each word see whether they appear close together
int windowDistance=words.length;
for(int i=0;i0 && diff0)
{
//proximity=((double) many)/((double) positions.size());
proximity=((double) many)/((double) outof);
}
total+=proximity;
return total;
}
public String description(String text,String[] words,int max)
{
String use=text.toLowerCase();
// find an area or areas with these words in
ArrayList areas=new ArrayList();
for(int i=0;i0)
{
if( Character.isLetter( use.charAt(pos-1)) )
{
//System.out.println("a substring: skipping");
at=pos+word.length();
continue;
}
}
int start=pos-30;
int end=pos+50;
if(start<0) start=0;
if(end>use.length()-1) end=use.length()-1;
int tryStart=use.lastIndexOf(" ",start);
if(tryStart!=-1)
{
start=tryStart;
}
int tryEnd=use.indexOf(".",end);
if(tryEnd==-1)
{
tryEnd=use.indexOf(" ",end);
if(tryEnd==-1) tryEnd=end;
}
end=tryEnd;
if(start>end){int swap=end;end=start;start=swap;}
int[] area=new int[]{start,end};
areas.add(area);
at=end;
}
}
//System.out.println("merging areas");
for(int i=0;iarea[0] && area2[0]area[1])
{
area[1]=area2[1];
areas.remove(j);
j--;
}
continue;
}
}
}
//System.out.println("building description");
String description="";
for(int i=0;imax)
{
description=description.substring(0,max);
}
String lower=description.toLowerCase();
//System.out.println("marking up");
for(int i=0;i=0)
{
if( Character.isLetter( lower.charAt(found-1)) )
{
at=found+word.length();
continue;
}
}
// get the word length
int here=found;
int length=0;
for(;;)
{
if(here>=lower.length()){break;}
char h=lower.charAt(here);
if(Character.isLetterOrDigit(h))
{
here++;
length++;
continue;
}
break;
}
description=description.substring(0,found)+
""+
description.substring(found,here)+
""+
description.substring(here);
at=found+length+7;
lower=description.toLowerCase();
}
}
return description;
}
%>
Searched this site for <%=words%>.
Results <%if(results.size()>0){%>1-<%=results.size()%><%}else%><%{%>0<%}%>
<%if(results.size()==0){%>No matches<%}%>
<%
for(int i=results.size()-1;i>=0;i--)
{
Object[] record=(Object[]) results.get(i);
double[] thisRank=(double[]) record[1];
%>
<%=record[3]%>
<%=record[2]%>
<%=record[0]%>
<%
}
}
%>