Commit f9988d96 authored by Romain Bignon's avatar Romain Bignon

work with browsershots to automatically take screenshots

parent edeb267a
......@@ -2,7 +2,9 @@
# -*- coding: utf-8 -*-
import sys
import os
import urllib
from urlparse import urlparse
import ClientForm
from copy import copy
import mechanize
......@@ -23,6 +25,7 @@ class Match(object):
self.data = None
class Browser(mechanize.Browser):
QUEUEDIR = '/var/lib/browsershot/queue/'
FIRST_STR = '<b>GOATSE</b>'
IMG_URL = 'http://parano.me/h'
FINAL_STR = '<img src=%s />' % IMG_URL
......@@ -108,7 +111,7 @@ class Browser(mechanize.Browser):
def check_image(self, match):
document = lxml.html.parse(match.res)
for img in document.getiterator('img'):
if img.attrib['src'] == self.IMG_URL:
if 'src' in img.attrib and img.attrib['src'] == self.IMG_URL:
parent = img.getparent()
while parent is not None:
if parent.tag == 'body':
......@@ -116,6 +119,18 @@ class Browser(mechanize.Browser):
parent = parent.getparent()
return False
def snapshot(self, match):
domain = urlparse(match.url).netloc
with open(os.path.join(self.QUEUEDIR, domain), 'w') as f:
f.write("""request %s
url %s
width 1024
height 768
bpp 24
browser Iceape
command iceape
""" % (domain, match.url.replace('"', '\"')))
def try_url(self, url):
if url in self.tested:
return
......@@ -146,6 +161,7 @@ class Browser(mechanize.Browser):
with open('matched.txt', 'a') as f:
f.write('%s\n' % (match.url))
print 'FOUND!'
self.snapshot(match)
i += 1
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment