138 lines
5.4 KiB
Plaintext
138 lines
5.4 KiB
Plaintext
|
#!/usr/bin/python
|
||
|
|
||
|
# Python bindings to the Google search engine
|
||
|
# Copyright (c) 2009-2019, Mario Vilas
|
||
|
# All rights reserved.
|
||
|
#
|
||
|
# Redistribution and use in source and binary forms, with or without
|
||
|
# modification, are permitted provided that the following conditions are met:
|
||
|
#
|
||
|
# * Redistributions of source code must retain the above copyright notice,
|
||
|
# this list of conditions and the following disclaimer.
|
||
|
# * Redistributions in binary form must reproduce the above copyright
|
||
|
# notice,this list of conditions and the following disclaimer in the
|
||
|
# documentation and/or other materials provided with the distribution.
|
||
|
# * Neither the name of the copyright holder nor the names of its
|
||
|
# contributors may be used to endorse or promote products derived from
|
||
|
# this software without specific prior written permission.
|
||
|
#
|
||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
import sys
|
||
|
|
||
|
from googlesearch import search, get_random_user_agent
|
||
|
|
||
|
# TODO port to argparse
|
||
|
from optparse import OptionParser, IndentedHelpFormatter
|
||
|
|
||
|
|
||
|
class BannerHelpFormatter(IndentedHelpFormatter):
|
||
|
|
||
|
"Just a small tweak to optparse to be able to print a banner."
|
||
|
|
||
|
def __init__(self, banner, *argv, **argd):
|
||
|
self.banner = banner
|
||
|
IndentedHelpFormatter.__init__(self, *argv, **argd)
|
||
|
|
||
|
def format_usage(self, usage):
|
||
|
msg = IndentedHelpFormatter.format_usage(self, usage)
|
||
|
return '%s\n%s' % (self.banner, msg)
|
||
|
|
||
|
|
||
|
def main():
|
||
|
|
||
|
# Parse the command line arguments.
|
||
|
formatter = BannerHelpFormatter(
|
||
|
"Python script to use the Google search engine\n"
|
||
|
"By Mario Vilas (mvilas at gmail dot com)\n"
|
||
|
"https://github.com/MarioVilas/googlesearch\n"
|
||
|
)
|
||
|
parser = OptionParser(formatter=formatter)
|
||
|
parser.set_usage("%prog [options] query")
|
||
|
parser.add_option(
|
||
|
'--tld', metavar='TLD', type='string', default='com',
|
||
|
help="top level domain to use [default: com]")
|
||
|
parser.add_option(
|
||
|
'--lang', metavar='LANGUAGE', type='string', default='en',
|
||
|
help="produce results in the given language [default: en]")
|
||
|
parser.add_option(
|
||
|
'--domains', metavar='DOMAINS', type='string', default='',
|
||
|
help="comma separated list of domains to constrain the search to")
|
||
|
parser.add_option(
|
||
|
'--tbs', metavar='TBS', type='string', default='0',
|
||
|
help="produce results from period [default: 0]")
|
||
|
parser.add_option(
|
||
|
'--safe', metavar='SAFE', type='string', default='off',
|
||
|
help="kids safe search [default: off]")
|
||
|
parser.add_option(
|
||
|
'--type', metavar='TYPE', type='string', default='search', dest='tpe',
|
||
|
help="search type (search, images, videos, news, shopping, books,"
|
||
|
" apps) [default: search]")
|
||
|
parser.add_option(
|
||
|
'--country', metavar='COUNTRY', type='string', default='',
|
||
|
help="region to restrict search on [default: not restricted]")
|
||
|
parser.add_option(
|
||
|
'--num', metavar='NUMBER', type='int', default=10,
|
||
|
help="number of results per page [default: 10]")
|
||
|
parser.add_option(
|
||
|
'--start', metavar='NUMBER', type='int', default=0,
|
||
|
help="first result to retrieve [default: 0]")
|
||
|
parser.add_option(
|
||
|
'--stop', metavar='NUMBER', type='int', default=0,
|
||
|
help="last result to retrieve [default: unlimited]")
|
||
|
parser.add_option(
|
||
|
'--pause', metavar='SECONDS', type='float', default=2.0,
|
||
|
help="pause between HTTP requests [default: 2.0]")
|
||
|
parser.add_option(
|
||
|
'--rua', metavar='USERAGENT', action='store_true', default=False,
|
||
|
help="Randomize the User-Agent [default: no]")
|
||
|
(options, args) = parser.parse_args()
|
||
|
query = ' '.join(args)
|
||
|
if not query:
|
||
|
parser.print_help()
|
||
|
sys.exit(2)
|
||
|
params = [
|
||
|
(k, v) for (k, v) in options.__dict__.items()
|
||
|
if not k.startswith('_')]
|
||
|
params = dict(params)
|
||
|
|
||
|
# Split the comma separated list of domains, if present.
|
||
|
if 'domains' in params:
|
||
|
params['domains'] = [x.strip() for x in params['domains'].split(',')]
|
||
|
|
||
|
# Use a special search type if requested.
|
||
|
if 'tpe' in params:
|
||
|
tpe = params['tpe']
|
||
|
if tpe and tpe not in (
|
||
|
'search', 'images', 'videos', 'news',
|
||
|
'shopping', 'books', 'apps'):
|
||
|
parser.error("invalid type: %r" % tpe)
|
||
|
if tpe == 'search':
|
||
|
params['tpe'] = ''
|
||
|
|
||
|
# Randomize the user agent if requested.
|
||
|
if 'rua' in params and params.pop('rua'):
|
||
|
params['user_agent'] = get_random_user_agent()
|
||
|
|
||
|
# Run the query.
|
||
|
for url in search(query, **params):
|
||
|
print(url)
|
||
|
try:
|
||
|
sys.stdout.flush()
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|