(.+?)
")
self.find_videos = re.compile("
.+?)\" class=l>(?P.+?) (?P.+?) ")
self.find_files = re.compile("\[(?P.{2,5})\] .+?)\" class=l>"+
"(?P.+?)"+
" File Format: (?P .+?)( - | )(.+?) (?P.+?) ")
self.find_recommend = re.compile("Did you mean( to search for)?: (?P.+?)")
self.find_num_results = re.compile("Results [0-9,]+ - [0-9,]+ of about (?P[0-9,]+)")
self.clean_regex = re.compile("(<(/)?(.+?)>|&(.+?);)")
self.results = []
self.query = query
self.offset = 0
self.recommend = ""
self.numResults = 0
self.iterate = 0
self.search(self.query)
def clean(self,string):
"""Takes all HTML tags out of the search results"""
return self.clean_regex.sub("",string)
def format_width(self,string):
return "\n\t".join(wrap(string,60))
def search(self, query, offset=0):
"""Preforms the search and retrieves the results at `offset`"""
start=""
if(offset>0): start="&start=%d"%offset
request = urllib2.Request('http://www.google.com/search?q='+quote_plus(query)+start)
self.opener.addheaders = [('User-Agent', 'google-cli')]
data = self.opener.open(request).read()
# file("test.html","w").writelines(data)
try:
self.numResults = self.find_num_results.findall(data)[0]
except:
self.numResults = 0
tmp = self.find_recommend.findall(data)
if len(tmp) != 0:
self.recommend = tmp[0][2]
self.query = query
self.offset = offset
self.results.extend(self.extract_results(data))
return self.results
def search_next(self):
"""Helper fuction to preform subsequent searches after the first"""
return self.search(self.query, self.offset+10)
def extract_results(self,data):
"""Goes through the retrieved HTML and extracts search results"""
resultsWeb = self.find_results.findall(data)
resultsVideo = self.find_videos.findall(data)
resultsFiles = self.find_files.findall(data)
return [{'url': x[1], 'title': "[%s] %s"%(self.clean(x[0]),self.clean(x[2])), 'desc': self.clean(x[6])} for x in resultsFiles] +\
[{'url': x[0], 'title': self.clean(x[1]), 'desc': self.clean(x[2])} for x in resultsVideo] +\
[{'url': x[1], 'title': self.clean(x[2]), 'desc': self.clean(x[6])} for x in resultsWeb]
def show_results(self, index, number):
"""Shows the `number` results starting at `index` in a pretty format"""
counter = 0
for x in self.get_results(index,number):
print "%d) %s\n\t%s\n\t%s"%(index+counter,x["title"],x["url"],self.format_width(x["desc"]))
counter+=1
def get_results(self, index, number):
"""Gets and returns results starting at index up to index+number in a list"""
while(len(self.results)"""
return ""
def __len__(self):
"""Returns the amount of results found"""
return len(self.results)
def __getitem__(self,key):
"""Returns item or slice of items requested."""
try:
high = key.stop
except:
high = key
self.get_results(high,1)
return self.results[key]
def __str__(self):
return "\n\n".join(["%s (%s)\n\t%s"%(x["title"],x["url"],self.format_width(x["desc"])) for x in self.results])
|
|