<%@page import="java.io.*"%><%@page import="java.util.*"%> <% Object[][] pages=(Object[][]) pageContext.getAttribute("searchPages",pageContext.APPLICATION_SCOPE); String WEB_INF=getServletContext().getRealPath("/WEB-INF"); File in=new File(WEB_INF+File.separator+"search.data"); long[] modified=(long[]) pageContext.getAttribute("searchPagesModified",pageContext.APPLICATION_SCOPE); boolean isModified=false; if(pages==null) { isModified=true; } else if(modified==null) { isModified=true; } else if(modified[0]!=in.lastModified()) { isModified=true; } if(isModified) { // (re)load the pages pages=loadDatafile(in); pageContext.setAttribute("searchPages",pages,pageContext.APPLICATION_SCOPE); modified=new long[]{in.lastModified()}; pageContext.setAttribute("searchPagesModified",modified,pageContext.APPLICATION_SCOPE); } boolean wordsSupplied=false; String words=request.getParameter("words"); if(words==null) {words="";} else { wordsSupplied=true; } %>
<% if(request.getMethod().equals("POST") || wordsSupplied) { // do a search String[] keywords=getKeywords(words); ArrayList results=search(keywords,pages); %>

Searched this site for <%=words%>.
Results <%if(results.size()>0){%>1-<%=results.size()%><%}else%><%{%>0<%}%>

<%if(results.size()==0){%>No matches<%}%> <% for(int i=results.size()-1;i>=0;i--) { Object[] record=(Object[]) results.get(i); double[] thisRank=(double[]) record[1]; %> <%=record[3]%>
<%=record[2]%>
<%=record[0]%>

<% } } %>

<%! // methods to load and do searches // load the file: private Object[][] loadDatafile(File in) { Object[][] pages=null; try { FileInputStream fis=new FileInputStream(in); int times=readTimes(fis); // System.out.println("times: "+times); pages=new Object[times][4]; for(int i=0;i0.0D) { rank+=usefulness; String desc=description(contents,keywords,chars).trim(); if(desc.length()==0){continue;} int length=desc.length(); if(length>chars){length=chars;} double spout=1.0D*(((double) length)/((double) chars)); rank=rank+spout; boolean biggest=false; int position=0; for(;position="+rank); if(rank <= thisRank[0]) { //System.out.println("position "+position); break; } if(position==results.size()-1) { biggest=true; } } Object[] record=new Object[]{url,new double[]{rank},desc,title}; if(!biggest) { results.add(position,record); } else { results.add(record); } } } if(!anything && results.size()==0){anything=true;continue;} break; } return results; } public String[] getKeywords(String params) { ArrayList[] theLot=parseSearchWords(params); String[] kw=new String[theLot[0].size()]; for(int i=0;i0) { if(plus){words.add(sb.toString());mustHave.add(sb.toString());} else{mustNotHave.add(sb.toString());} sb=new StringBuffer(); } continue; } if(c=='\"' && inQuotes==true) { inQuotes=false; if(sb.length()>0) { String phrase=sb.toString(); if(plus) { StringTokenizer st=new StringTokenizer(phrase,"!�$%^&*()-=_+:@~;\'#|\\<>?,./ "); while(st.hasMoreTokens()) { words.add(st.nextToken()); } mustHave.add(phrase); } else { mustNotHave.add(phrase); } sb=new StringBuffer(); } plus=true; continue; } if(inQuotes) { sb.append(c); continue; } if(c=='+' || c=='-' || Character.isWhitespace(c)) { if(sb.length()>0) { if(plus){words.add(sb.toString());mustHave.add(sb.toString());} else{mustNotHave.add(sb.toString());} sb=new StringBuffer(); } } if(c=='+') { plus=true; continue; } if(c=='-') { plus=false; continue; } if(Character.isWhitespace(c)) { plus=true; continue; } sb.append(c); } if(sb.length()>0) { if(plus){words.add(sb.toString());mustHave.add(sb.toString());} else{mustNotHave.add(sb.toString());} } return new ArrayList[]{words,mustHave,mustNotHave}; } public static double pageRank(String[] wordRanks,String[] words,String contents,boolean anything) { // calculate the relevance of this page to these words double sum=0.0D; for(int i=wordRanks.length;i>wordRanks.length-words.length;i--) { sum+=(double) (i+1); } double total=0.0D; int found=0; for(int i=0;i=lower.length()){break;} int next=lower.indexOf(in,at); if(next==-1) {break;} positions.add(new int[]{next}); at=next+in.length(); keywordCount[w]++; } } int outof=0; // possible alignment matches for(int w=0;woutof){outof=keywordCount[w];} } //2. for each word see whether they appear close together int windowDistance=words.length; for(int i=0;i0 && diff0) { //proximity=((double) many)/((double) positions.size()); proximity=((double) many)/((double) outof); } total+=proximity; return total; } public String description(String text,String[] words,int max) { String use=text.toLowerCase(); // find an area or areas with these words in ArrayList areas=new ArrayList(); for(int i=0;i0) { if( Character.isLetter( use.charAt(pos-1)) ) { //System.out.println("a substring: skipping"); at=pos+word.length(); continue; } } int start=pos-30; int end=pos+50; if(start<0) start=0; if(end>use.length()-1) end=use.length()-1; int tryStart=use.lastIndexOf(" ",start); if(tryStart!=-1) { start=tryStart; } int tryEnd=use.indexOf(".",end); if(tryEnd==-1) { tryEnd=use.indexOf(" ",end); if(tryEnd==-1) tryEnd=end; } end=tryEnd; if(start>end){int swap=end;end=start;start=swap;} int[] area=new int[]{start,end}; areas.add(area); at=end; } } //System.out.println("merging areas"); for(int i=0;iarea[0] && area2[0]area[1]) { area[1]=area2[1]; areas.remove(j); j--; } continue; } } } //System.out.println("building description"); String description=""; for(int i=0;imax) { description=description.substring(0,max); } String lower=description.toLowerCase(); //System.out.println("marking up"); for(int i=0;i=0) { if( Character.isLetter( lower.charAt(found-1)) ) { at=found+word.length(); continue; } } // get the word length int here=found; int length=0; for(;;) { if(here>=lower.length()){break;} char h=lower.charAt(here); if(Character.isLetterOrDigit(h)) { here++; length++; continue; } break; } description=description.substring(0,found)+ ""+ description.substring(found,here)+ ""+ description.substring(here); at=found+length+7; lower=description.toLowerCase(); } } return description; } %>