#!/usr/bin/python # Python bindings to the Google search engine # Copyright (c) 2009-2019, Mario Vilas # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice,this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the copyright holder nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. import sys from googlesearch import search, get_random_user_agent # TODO port to argparse from optparse import OptionParser, IndentedHelpFormatter class BannerHelpFormatter(IndentedHelpFormatter): "Just a small tweak to optparse to be able to print a banner." def __init__(self, banner, *argv, **argd): self.banner = banner IndentedHelpFormatter.__init__(self, *argv, **argd) def format_usage(self, usage): msg = IndentedHelpFormatter.format_usage(self, usage) return '%s\n%s' % (self.banner, msg) def main(): # Parse the command line arguments. formatter = BannerHelpFormatter( "Python script to use the Google search engine\n" "By Mario Vilas (mvilas at gmail dot com)\n" "https://github.com/MarioVilas/googlesearch\n" ) parser = OptionParser(formatter=formatter) parser.set_usage("%prog [options] query") parser.add_option( '--tld', metavar='TLD', type='string', default='com', help="top level domain to use [default: com]") parser.add_option( '--lang', metavar='LANGUAGE', type='string', default='en', help="produce results in the given language [default: en]") parser.add_option( '--domains', metavar='DOMAINS', type='string', default='', help="comma separated list of domains to constrain the search to") parser.add_option( '--tbs', metavar='TBS', type='string', default='0', help="produce results from period [default: 0]") parser.add_option( '--safe', metavar='SAFE', type='string', default='off', help="kids safe search [default: off]") parser.add_option( '--type', metavar='TYPE', type='string', default='search', dest='tpe', help="search type (search, images, videos, news, shopping, books," " apps) [default: search]") parser.add_option( '--country', metavar='COUNTRY', type='string', default='', help="region to restrict search on [default: not restricted]") parser.add_option( '--num', metavar='NUMBER', type='int', default=10, help="number of results per page [default: 10]") parser.add_option( '--start', metavar='NUMBER', type='int', default=0, help="first result to retrieve [default: 0]") parser.add_option( '--stop', metavar='NUMBER', type='int', default=0, help="last result to retrieve [default: unlimited]") parser.add_option( '--pause', metavar='SECONDS', type='float', default=2.0, help="pause between HTTP requests [default: 2.0]") parser.add_option( '--rua', metavar='USERAGENT', action='store_true', default=False, help="Randomize the User-Agent [default: no]") (options, args) = parser.parse_args() query = ' '.join(args) if not query: parser.print_help() sys.exit(2) params = [ (k, v) for (k, v) in options.__dict__.items() if not k.startswith('_')] params = dict(params) # Split the comma separated list of domains, if present. if 'domains' in params: params['domains'] = [x.strip() for x in params['domains'].split(',')] # Use a special search type if requested. if 'tpe' in params: tpe = params['tpe'] if tpe and tpe not in ( 'search', 'images', 'videos', 'news', 'shopping', 'books', 'apps'): parser.error("invalid type: %r" % tpe) if tpe == 'search': params['tpe'] = '' # Randomize the user agent if requested. if 'rua' in params and params.pop('rua'): params['user_agent'] = get_random_user_agent() # Run the query. for url in search(query, **params): print(url) try: sys.stdout.flush() except Exception: pass if __name__ == '__main__': main()