22 changed files with 225 additions and 605 deletions
@ -1,164 +1,56 @@
|
||||
#!/usr/bin/env python |
||||
""" |
||||
devwiki.py - Phenny Wiki Module |
||||
Copyright 2008-9, Sean B. Palmer, inamidst.com |
||||
Modified by Sfan5 2013 |
||||
Licensed under the Eiffel Forum License 2. |
||||
|
||||
http://inamidst.com/phenny/ |
||||
wiki.py - Phenny Wiki Module |
||||
Copyright 2014, sfan5 |
||||
""" |
||||
|
||||
import re, urllib, gzip, StringIO |
||||
import re |
||||
import web |
||||
|
||||
devwikiuri = 'http://dev.minetest.net/%s' |
||||
|
||||
r_tr = re.compile(r'(?ims)<tr[^>]*>.*?</tr>') |
||||
r_paragraph = re.compile(r'(?ims)<p[^>]*>.*?</p>|<li(?!n)[^>]*>.*?</li>') |
||||
r_tag = re.compile(r'<(?!!)[^>]+>') |
||||
r_whitespace = re.compile(r'[\t\r\n ]+') |
||||
r_redirect = re.compile( |
||||
r'(?ims)class=.redirectText.>\s*<a\s*href=./wiki/([^"/]+)' |
||||
) |
||||
|
||||
abbrs = ['etc', 'ca', 'cf', 'Co', 'Ltd', 'Inc', 'Mt', 'Mr', 'Mrs', |
||||
'Dr', 'Ms', 'Rev', 'Fr', 'St', 'Sgt', 'pron', 'approx', 'lit', |
||||
'syn', 'transl', 'sess', 'fl', 'Op', 'Dec', 'Brig', 'Gen'] \ |
||||
+ list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') \ |
||||
+ list('abcdefghijklmnopqrstuvwxyz') |
||||
t_sentence = r'^.{5,}?(?<!\b%s)(?:\.(?=[\[ ][A-Z0-9]|\Z)|\Z)' |
||||
r_sentence = re.compile(t_sentence % r')(?<!\b'.join(abbrs)) |
||||
|
||||
def unescape(s): |
||||
s = s.replace('>', '>') |
||||
s = s.replace('<', '<') |
||||
s = s.replace('&', '&') |
||||
s = s.replace(' ', ' ') |
||||
return s |
||||
|
||||
def text(html): |
||||
html = r_tag.sub('', html) |
||||
html = r_whitespace.sub(' ', html) |
||||
return unescape(html).strip() |
||||
|
||||
def devwikipedia(term, language='en', last=False): |
||||
global devwikiuri |
||||
if not '%' in term: |
||||
if isinstance(term, unicode): |
||||
t = term.encode('utf-8') |
||||
else: t = term |
||||
q = urllib.quote(t) |
||||
u = devwikiuri % (q) |
||||
bytes = web.get(u) |
||||
else: bytes = web.get(devwikiuri % (term)) |
||||
|
||||
if bytes.startswith('\x1f\x8b\x08\x00\x00\x00\x00\x00'): |
||||
f = StringIO.StringIO(bytes) |
||||
f.seek(0) |
||||
gzip_file = gzip.GzipFile(fileobj=f) |
||||
bytes = gzip_file.read() |
||||
gzip_file.close() |
||||
f.close() |
||||
|
||||
bytes = r_tr.sub('', bytes) |
||||
|
||||
if not last: |
||||
r = r_redirect.search(bytes[:4096]) |
||||
if r: |
||||
term = urllib.unquote(r.group(1)) |
||||
return devwikipedia(term, language=language, last=True) |
||||
|
||||
paragraphs = r_paragraph.findall(bytes) |
||||
|
||||
if not paragraphs: |
||||
if not last: |
||||
term = search(term) |
||||
return devwikipedia(term, language=language, last=True) |
||||
return None |
||||
|
||||
# Pre-process |
||||
paragraphs = [para for para in paragraphs |
||||
if (para and 'technical limitations' not in para |
||||
and 'window.showTocToggle' not in para |
||||
and 'Deletion_policy' not in para |
||||
and 'Template:AfD_footer' not in para |
||||
and not (para.startswith('<p><i>') and |
||||
para.endswith('</i></p>')) |
||||
and not 'disambiguation)"' in para) |
||||
and not '(images and media)' in para |
||||
and not 'This article contains a' in para |
||||
and not 'id="coordinates"' in para |
||||
and not 'class="thumb' in para] |
||||
# and not 'style="display:none"' in para] |
||||
|
||||
for i, para in enumerate(paragraphs): |
||||
para = para.replace('<sup>', '|') |
||||
para = para.replace('</sup>', '|') |
||||
paragraphs[i] = text(para).strip() |
||||
|
||||
# Post-process |
||||
paragraphs = [para for para in paragraphs if |
||||
(para and not (para.endswith(':') and len(para) < 150))] |
||||
|
||||
para = text(paragraphs[0]) |
||||
m = r_sentence.match(para) |
||||
|
||||
if not m: |
||||
if not last: |
||||
term = search(term) |
||||
return devwikipedia(term, language=language, last=True) |
||||
return None |
||||
sentence = m.group(0) |
||||
|
||||
maxlength = 275 |
||||
if len(sentence) > maxlength: |
||||
sentence = sentence[:maxlength] |
||||
words = sentence[:-5].split(' ') |
||||
words.pop() |
||||
sentence = ' '.join(words) + ' [...]' |
||||
|
||||
if (('using the Article Wizard if you wish' in sentence) |
||||
or ('or add a request for it' in sentence) |
||||
or ('in existing articles' in sentence)): |
||||
if not last: |
||||
term = search(term) |
||||
return devwikipedia(term, language=language, last=True) |
||||
return None |
||||
|
||||
sentence = '"' + sentence.replace('"', "'") + '"' |
||||
sentence = sentence.decode('utf-8').encode('utf-8') |
||||
devwikiuri = devwikiuri.decode('utf-8').encode('utf-8') |
||||
term = term.decode('utf-8').encode('utf-8') |
||||
return sentence + ' - ' + (devwikiuri % (term)) |
||||
|
||||
def devwik(phenny, input): |
||||
origterm = input.groups()[1] |
||||
if not origterm: |
||||
return phenny.say('Perhaps you meant "!devwik Zen"?') |
||||
origterm = origterm.encode('utf-8') |
||||
log.log("event", "%s queried Devwiki for '%s'" % (log.fmt_user(input), origterm), phenny) |
||||
|
||||
term = urllib.unquote(origterm) |
||||
language = 'en' |
||||
if term.startswith(':') and (' ' in term): |
||||
a, b = term.split(' ', 1) |
||||
a = a.lstrip(':') |
||||
if a.isalpha(): |
||||
language, term = a, b |
||||
term = term.replace(' ', '_') |
||||
|
||||
try: result = devwikipedia(term, language) |
||||
except IOError: |
||||
args = (language, devwikiuri % (term)) |
||||
error = "Can't connect to dev.minetest.net (%s)" % args |
||||
return phenny.say(error) |
||||
|
||||
if result is not None: |
||||
phenny.say(result) |
||||
else: phenny.say('Can\'t find anything in Dev Wiki for "%s".' % origterm) |
||||
|
||||
devwik.commands = ['dev', 'devwik', 'devwiki'] |
||||
devwik.priority = 'high' |
||||
import urllib.parse |
||||
|
||||
wikiuri_g = 'http://dev.minetest.net/index.php?title=%s&printable=yes' |
||||
wikiuri_r = 'http://dev.minetest.net/index.php?title=%s' |
||||
|
||||
r_content = re.compile(r'(?i)<div[^>]+class=.mw-content-ltr.>') |
||||
r_paragraph = re.compile(r'(?ims)<p>(.+?)</p>') |
||||
r_sentenceend = re.compile(r'\.[^\.]') |
||||
transforms = [ |
||||
re.compile(r'(?i)<a [^>]+>(.+?)</a>'), |
||||
re.compile(r'(?i)<b>(.+?)</b>'), |
||||
re.compile(r'(?i)<i>(.+?)</i>'), |
||||
] |
||||
|
||||
def wiki(phenny, input): |
||||
term = input.group(2) |
||||
if not term: |
||||
return |
||||
|
||||
log.log("event", "%s queried Developer Wiki for '%s'" % (log.fmt_user(input), term), phenny) |
||||
term = web.urlencode(term) |
||||
|
||||
data, scode = web.get(wikiuri_g % term) |
||||
if scode == 404: |
||||
return phenny.say("No such page.") |
||||
data = str(data, "utf-8") |
||||
|
||||
m = re.search(r_content, data) |
||||
if not m: |
||||
return phenny.say("Sorry, did not find anything.") |
||||
data = data[m.span()[1]:] |
||||
|
||||
m = re.search(r_paragraph, data) |
||||
if not m: |
||||
return phenny.say("Sorry, did not find anything.") |
||||
data = m.group(1) |
||||
for transform in transforms: |
||||
data = re.sub(transform, '\g<1>', data) |
||||
m = re.search(r_sentenceend, data) |
||||
if m: |
||||
data = data[:m.span()[1]-1] |
||||
phenny.say('"%s" - %s ' % (web.decode(data), wikiuri_r % term)) |
||||
|
||||
wiki.commands = ['devwik', 'devwiki'] |
||||
wiki.priority = 'high' |
||||
|
||||
if __name__ == '__main__': |
||||
print __doc__.strip() |
||||
print(__doc__.strip()) |
||||
|
@ -1,38 +0,0 @@
|
||||
#!/usr/bin/env python |
||||
""" |
||||
shorten.py - URL Shorten Module |
||||
Copyright 2013, sfan5 |
||||
""" |
||||
import urllib |
||||
|
||||
def shorten(phenny, input): |
||||
arg = input.group(2) |
||||
if not arg: |
||||
arg = "" # Function continues and prints Help Message |
||||
arg = arg.split(' ') |
||||
if len(arg) < 2: |
||||
phenny.reply("Give me an url shorten service and an address") |
||||
return phenny.reply("Supported Services: is.gd, v.gd") |
||||
else: |
||||
if arg[0].lower() == "is.gd": |
||||
p = urllib.urlencode({'format' :"simple", 'url': arg[1]}) |
||||
try: |
||||
u = urllib.urlopen("http://is.gd/create.php?%s" % p) |
||||
return phenny.reply(u.read()) |
||||
except: |
||||
return phenny.reply("Problems accessing is.gd, please try a different Service") |
||||
if arg[0].lower() == "v.gd": |
||||
p = urllib.urlencode({'format' :"simple", 'url': arg[1]}) |
||||
try: |
||||
u = urllib.urlopen("http://v.gd/create.php?%s" % p) |
||||
return phenny.reply(u.read()) |
||||
except: |
||||
return phenny.reply("Problems accessing v.gd, please try a different Service") |
||||
return phenny.reply("Unknown Service") |
||||
|
||||
|
||||
shorten.commands = ['shorten','sh'] |
||||
shorten.thread = True |
||||
|
||||
if __name__ == '__main__': |
||||
print __doc__ |
@ -1,100 +1,46 @@
|
||||
#!/usr/bin/env python |
||||
""" |
||||
title.py - Phenny URL Title Module |
||||
Copyright 2008, Sean B. Palmer, inamidst.com |
||||
Modified by sfan5, 2013 |
||||
Licensed under the Eiffel Forum License 2. |
||||
|
||||
http://inamidst.com/phenny/ |
||||
Copyright 2014, sfan5 |
||||
""" |
||||
|
||||
import re, urllib2, urlparse |
||||
|
||||
r_title = re.compile(r'(?ims)<title[^>]*>(.*?)</title\s*>') |
||||
|
||||
def f_title(phenny, input): |
||||
uri = input.group(2) |
||||
uri = (uri or '').encode('utf-8') |
||||
|
||||
if not uri and hasattr(phenny.bot, 'last_seen_uri'): |
||||
uri = phenny.bot.last_seen_uri |
||||
if not uri: |
||||
return phenny.reply('I need a URI to give the title of...') |
||||
|
||||
if not ':' in uri: |
||||
uri = 'http://' + uri |
||||
|
||||
import re |
||||
import web |
||||
|
||||
r_title = re.compile(r'(?ims)<\s*title[^>]*>(.*?)<\s*/\s*title\s*>') |
||||
|
||||
def title(phenny, input): |
||||
uri = input.group(2).strip() |
||||
if uri: |
||||
pass |
||||
elif hasattr(phenny.bot, 'last_seen_uri'): |
||||
uri = phenny.bot.last_seen_uri |
||||
else: |
||||
return phenny.reply("Give me an URI..") |
||||
data, sc = web.get(uri, 4096) |
||||
if sc != 200: |
||||
return phenny.say("HTTP error %d" % sc) |
||||
try: |
||||
redirects = 0 |
||||
while True: |
||||
headers = { |
||||
'Accept': 'text/html', |
||||
'User-Agent': 'Mozilla/5.0 (MinetestBot)' |
||||
} |
||||
req = urllib2.Request(uri, headers=headers) |
||||
u = urllib2.urlopen(req) |
||||
info = u.info() |
||||
u.close() |
||||
|
||||
if not isinstance(info, list): |
||||
status = '200' |
||||
else: |
||||
status = str(info[1]) |
||||
info = info[0] |
||||
if status.startswith('3'): |
||||
uri = urlparse.urljoin(uri, info['Location']) |
||||
else: break |
||||
|
||||
redirects += 1 |
||||
if redirects >= 20: |
||||
return phenny.reply("Too many redirects") |
||||
|
||||
try: mtype = info['content-type'] |
||||
except: |
||||
return phenny.reply("Couldn't get the Content-Type, sorry") |
||||
if not (('/html' in mtype) or ('/xhtml' in mtype)): |
||||
return phenny.reply("Document isn't HTML") |
||||
|
||||
u = urllib2.urlopen(req) |
||||
bytes = u.read(262144) |
||||
u.close() |
||||
|
||||
except IOError: |
||||
return phenny.reply("Can't connect to %s" % uri) |
||||
|
||||
m = r_title.search(bytes) |
||||
if m: |
||||
title = m.group(1) |
||||
title = title.strip() |
||||
title = title.replace('\n', ' ') |
||||
title = title.replace('\r', ' ') |
||||
while ' ' in title: |
||||
title = title.replace(' ', ' ') |
||||
if len(title) > 100: |
||||
title = title[:100] + '[...]' |
||||
|
||||
if title: |
||||
try: title.decode('utf-8') |
||||
except: |
||||
try: title = title.decode('iso-8859-1').encode('utf-8') |
||||
except: title = title.decode('cp1252').encode('utf-8') |
||||
else: pass |
||||
else: title = '[The title is empty.]' |
||||
|
||||
title = title.replace('\n', '') |
||||
title = title.replace('\r', '') |
||||
return phenny.reply(title) |
||||
else: return phenny.reply('No title found') |
||||
|
||||
f_title.commands = ['title'] |
||||
data = str(data, 'utf-8') |
||||
except UnicodeDecodeError: |
||||
return phenny.say("Doesn't seem to be HTML..") |
||||
m = re.search(r_title, data) |
||||
if not m: |
||||
return phenny.say("No title found.") |
||||
title = m.group(1).strip() |
||||
if len(title) > 75: |
||||
title = title[:75] + "[...]" |
||||
phenny.reply(title) |
||||
|
||||
title.commands = ['title'] |
||||
|
||||
def noteuri(phenny, input): |
||||
uri = input.group(1).encode('utf-8') |
||||
uri = input.group(1) |
||||
phenny.bot.last_seen_uri = uri |
||||
|
||||
noteuri.rule = r'.*(https?://[^<> "\x01]+).*' |
||||
noteuri.rule = r'(https?://[^<> "\x01]+)' |
||||
noteuri.priority = 'low' |
||||
noteuri.nohook = True |
||||
|
||||
if __name__ == '__main__': |
||||
print __doc__.strip() |
||||
print(__doc__.strip()) |
||||
|
@ -1,166 +1,56 @@
|
||||
#!/usr/bin/env python |
||||
""" |
||||
wiki.py - Phenny Wiki Module |
||||
Copyright 2008-9, Sean B. Palmer, inamidst.com |
||||
Modified by sfan5 2013 |
||||
Licensed under the Eiffel Forum License 2. |
||||
|
||||
http://inamidst.com/phenny/ |
||||
Copyright 2014, sfan5 |
||||
""" |
||||
|
||||
import re, urllib, gzip, StringIO |
||||
import re |
||||
import web |
||||
|
||||
wikiuri = 'http://wiki.minetest.net/%s' |
||||
|
||||
r_tr = re.compile(r'(?ims)<tr[^>]*>.*?</tr>') |
||||
r_paragraph = re.compile(r'(?ims)<p[^>]*>.*?</p>|<li(?!n)[^>]*>.*?</li>') |
||||
r_tag = re.compile(r'<(?!!)[^>]+>') |
||||
r_whitespace = re.compile(r'[\t\r\n ]+') |
||||
r_redirect = re.compile( |
||||
r'(?ims)class=.redirectText.>\s*<a\s*href=./wiki/([^"/]+)' |
||||
) |
||||
|
||||
abbrs = ['etc', 'ca', 'cf', 'Co', 'Ltd', 'Inc', 'Mt', 'Mr', 'Mrs', |
||||
'Dr', 'Ms', 'Rev', 'Fr', 'St', 'Sgt', 'pron', 'approx', 'lit', |
||||
'syn', 'transl', 'sess', 'fl', 'Op', 'Dec', 'Brig', 'Gen'] \ |
||||
+ list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') \ |
||||
+ list('abcdefghijklmnopqrstuvwxyz') |
||||
t_sentence = r'^.{5,}?(?<!\b%s)(?:\.(?=[\[ ][A-Z0-9]|\Z)|\Z)' |
||||
r_sentence = re.compile(t_sentence % r')(?<!\b'.join(abbrs)) |
||||
|
||||
def unescape(s): |
||||
s = s.replace('>', '>') |
||||
s = s.replace('<', '<') |
||||
s = s.replace('&', '&') |
||||
s = s.replace(' ', ' ') |
||||
return s |
||||
|
||||
def text(html): |
||||
html = r_tag.sub('', html) |
||||
html = r_whitespace.sub(' ', html) |
||||
return unescape(html).strip() |
||||
|
||||
def wikipedia(term, language='en', last=False): |
||||
global wikiuri |
||||
if not '%' in term: |
||||
if isinstance(term, unicode): |
||||
t = term.encode('utf-8') |
||||
else: t = term |
||||
q = urllib.quote(t) |
||||
u = wikiuri % (q) |
||||
bytes = web.get(u) |
||||
else: bytes = web.get(wikiuri % (term)) |
||||
|
||||
if bytes.startswith('\x1f\x8b\x08\x00\x00\x00\x00\x00'): |
||||
f = StringIO.StringIO(bytes) |
||||
f.seek(0) |
||||
gzip_file = gzip.GzipFile(fileobj=f) |
||||
bytes = gzip_file.read() |
||||
gzip_file.close() |
||||
f.close() |
||||
|
||||
bytes = r_tr.sub('', bytes) |
||||
|
||||
if not last: |
||||
r = r_redirect.search(bytes[:4096]) |
||||
if r: |
||||
term = urllib.unquote(r.group(1)) |
||||
return wikipedia(term, language=language, last=True) |
||||
|
||||
paragraphs = r_paragraph.findall(bytes) |
||||
|
||||
if not paragraphs: |
||||
if not last: |
||||
term = search(term) |
||||
return wikipedia(term, language=language, last=True) |
||||
return None |
||||
|
||||
# Pre-process |
||||
paragraphs = [para for para in paragraphs |
||||
if (para and 'technical limitations' not in para |
||||
and 'window.showTocToggle' not in para |
||||
and 'Deletion_policy' not in para |
||||
and 'Template:AfD_footer' not in para |
||||
and not (para.startswith('<p><i>') and |
||||
para.endswith('</i></p>')) |
||||
and not 'disambiguation)"' in para) |
||||
and not '(images and media)' in para |
||||
and not 'This article contains a' in para |
||||
and not 'id="coordinates"' in para |
||||
and not 'class="thumb' in para] |
||||
# and not 'style="display:none"' in para] |
||||
|
||||
for i, para in enumerate(paragraphs): |
||||
para = para.replace('<sup>', '|') |
||||
para = para.replace('</sup>', '|') |
||||
paragraphs[i] = text(para).strip() |
||||
|
||||
# Post-process |
||||
paragraphs = [para for para in paragraphs if |
||||
(para and not (para.endswith(':') and len(para) < 150))] |
||||
|
||||
para = text(paragraphs[0]) |
||||
m = r_sentence.match(para) |
||||
|
||||
if not m: |
||||
if not last: |
||||
term = search(term) |
||||
return wikipedia(term, language=language, last=True) |
||||
return None |
||||
sentence = m.group(0) |
||||
|
||||
maxlength = 275 |
||||
if len(sentence) > maxlength: |
||||
sentence = sentence[:maxlength] |
||||
words = sentence[:-5].split(' ') |
||||
words.pop() |
||||
sentence = ' '.join(words) + ' [...]' |
||||
|
||||
if (('using the Article Wizard if you wish' in sentence) |
||||
or ('or add a request for it' in sentence) |
||||
or ('in existing articles' in sentence)): |
||||
if not last: |
||||
term = search(term) |
||||
return wikipedia(term, language=language, last=True) |
||||
return None |
||||
|
||||
sentence = '"' + sentence.replace('"', "'") + '"' |
||||
sentence = sentence.decode('utf-8').encode('utf-8') |
||||
wikiuri = wikiuri.decode('utf-8').encode('utf-8') |
||||
term = term.decode('utf-8').encode('utf-8') |
||||
return sentence + ' - ' + (wikiuri % (term)) |
||||
|
||||
def wik(phenny, input): |
||||
origterm = input.groups()[1] |
||||
if not origterm: |
||||
return phenny.say('Perhaps you meant "!wik Zen"?') |
||||
origterm = origterm.encode('utf-8') |
||||
|
||||
log.log("event", "%s queried Wiki for '%s'" % (log.fmt_user(input), origterm), phenny) |
||||
|
||||
term = urllib.unquote(origterm) |
||||
language = 'en' |
||||
if term.startswith(':') and (' ' in term): |
||||
a, b = term.split(' ', 1) |
||||
a = a.lstrip(':') |
||||
if a.isalpha(): |
||||
language, term = a, b |
||||
term = term[0].upper() + term[1:] |
||||
term = term.replace(' ', '_') |
||||
|
||||
try: result = wikipedia(term, language) |
||||
except IOError: |
||||
args = (language, wikiuri % (term)) |
||||
error = "Can't connect to wiki.minetest.com (%s)" % args |
||||
return phenny.say(error) |
||||
|
||||
if result is not None: |
||||
phenny.say(result) |
||||
else: phenny.say('Can\'t find anything in Wiki for "%s".' % origterm) |
||||
|
||||
wik.commands = ['wik', 'wiki'] |
||||
wik.priority = 'high' |
||||
import urllib.parse |
||||
|
||||
wikiuri_g = 'http://wiki.minetest.net/%s?printable=yes' |
||||
wikiuri_r = 'http://wiki.minetest.net/%s' |
||||
|
||||
r_content = re.compile(r'(?i)<div[^>]+class=.mw-content-ltr.>') |
||||
r_paragraph = re.compile(r'(?ims)<p>(.+?)</p>') |
||||
r_sentenceend = re.compile(r'\.[^\.]') |
||||
transforms = [ |
||||
re.compile(r'(?i)<a [^>]+>(.+?)</a>'), |
||||
re.compile(r'(?i)<b>(.+?)</b>'), |
||||
re.compile(r'(?i)<i>(.+?)</i>'), |
||||