diff options
author | diSabler <dissy@ya.ru> | 2014-01-18 21:46:52 +0400 |
---|---|---|
committer | diSabler <dissy@ya.ru> | 2014-01-18 21:46:52 +0400 |
commit | 44de416b457e7868468c307d55c1b6b8af1c26b2 (patch) | |
tree | b4016a2747c5901dd4556f48a3c7718598cf1c06 | |
parent | 4f8beae698e2be97f3dd37c60b188f1f1152e8d1 (diff) |
add: optimization in feeds
WARNING! Before update read http://isida-bot.com/wiki/update
-rw-r--r-- | plugins/main.py | 65 | ||||
-rw-r--r-- | scripts/mysql.schema | 3 | ||||
-rw-r--r-- | scripts/pgsql.schema | 3 | ||||
-rw-r--r-- | scripts/sqlite3.schema | 3 |
4 files changed, 45 insertions, 29 deletions
diff --git a/plugins/main.py b/plugins/main.py index 708ec86..b68bd6e 100644 --- a/plugins/main.py +++ b/plugins/main.py @@ -141,7 +141,7 @@ def get_size_human(mt): mt = mt / 1024.0 if mt < 1024: break return '%.2f%s' % (mt,t) - + def is_owner(jid): return cur_execute_fetchone('select * from bot_owner where jid=%s',(getRoom(jid),)) != None def validate_nick(nick,count): @@ -546,7 +546,6 @@ def un_unix(*val): if len(val) == 2: rn = val[1] else: rn = '' val = val[0] - tt = map(lambda q,a: q-a, time.gmtime(val), time.gmtime(0))[:6] ret = '%02d:%02d:%02d' % tuple(tt[3:6]) if sum(tt[:3]): @@ -1318,7 +1317,7 @@ def rss(type, jid, nick, text): except: ofset = 4 if timetype == 'm' and ofset < GT('rss_min_time_limit'): timetype = '%sm' % GT('rss_min_time_limit') else: timetype = str(ofset)+timetype - cur_execute('insert into feed values (%s,%s,%s,%s,%s,%s);',(link, timetype, text[3], int(time.time()), getRoom(jid),[] if base_type == 'pgsql' else '[]')) + cur_execute('insert into feed values (%s,%s,%s,%s,%s,%s,%s);',(link, timetype, text[3], int(time.time()), getRoom(jid),[] if base_type == 'pgsql' else '[]','')) msg = L('Add feed to schedule: %s (%s) %s','%s/%s'%(jid,nick)) % (link,timetype,text[3]) rss(type, jid, nick, 'get %s 1 %s' % (link,text[3])) elif mode == 'del': @@ -1332,26 +1331,38 @@ def rss(type, jid, nick, text): elif mode in ['new','get']: link = text[1] if not re.findall('^http(s?)://',link[:10]): link = 'http://%s' % link - try: - req = urllib2.Request(link.encode('utf-8')) - req.add_header('User-Agent',GT('user_agent')) - feed = urllib2.urlopen(url=req,timeout=GT('rss_get_timeout')).read(GT('size_overflow')) - except: feed = L('Unable to access server!','%s/%s'%(jid,nick)) - is_rss_aton,fc = 0,feed[:256] - if '<?xml version=' in fc: - if '<feed' in fc: - is_rss_aton = 2 - t_feed = feed.split('<title>') - feed = t_feed[0] - for tmp in t_feed[1:]: - tm = tmp.split('</title>',1) - if ord(tm[0][-1]) == 208: tm[0] = tm[0][:-1] + '...' - feed += '<title>%s</title>%s' % tuple(tm) - elif '<rss' in fc or '<rdf' in fc: is_rss_aton = 1 - feed = html_encode(feed) - feed = re.sub('(<span.*?>.*?</span>)','',feed) - feed = re.sub('(<div.*?>)','',feed) - feed = re.sub('(</div>)','',feed) + body, result = get_opener(enidna(link)) + modified,need_update_feed = '',True + if result: + modified = body.headers.get('last-modified',None) + if not modified: modified = get_tag(load_page_size(link, 1024),'updated') + if modified: + changed = cur_execute_fetchone('select changed from feed where room=%s and url=%s;',(jid,link))[0] + if modified == changed: need_update_feed = False + else: cur_execute('update feed set changed=%s where room=%s and url=%s',(modified,jid,link)) + is_rss_aton = 0 + if need_update_feed: + try: + req = urllib2.Request(link.encode('utf-8')) + req.add_header('User-Agent',GT('user_agent')) + feed = urllib2.urlopen(url=req,timeout=GT('rss_get_timeout')).read(GT('size_overflow')) + except: feed = L('Unable to access server!','%s/%s'%(jid,nick)) + fc = feed[:256] + if '<?xml version=' in fc: + if '<feed' in fc: + is_rss_aton = 2 + t_feed = feed.split('<title>') + feed = t_feed[0] + for tmp in t_feed[1:]: + tm = tmp.split('</title>',1) + if ord(tm[0][-1]) == 208: tm[0] = tm[0][:-1] + '...' + feed += '<title>%s</title>%s' % tuple(tm) + elif '<rss' in fc or '<rdf' in fc: is_rss_aton = 1 + feed = html_encode(feed) + feed = re.sub('(<span.*?>.*?</span>)','',feed) + feed = re.sub('(<div.*?>)','',feed) + feed = re.sub('(</div>)','',feed) + else: feed = L('New feeds not found!','%s/%s'%(jid,nick)) if is_rss_aton and feed != L('Encoding error!','%s/%s'%(jid,nick)) and feed != L('Unable to access server!','%s/%s'%(jid,nick)): if is_rss_aton == 1: if '<item>' in feed: fd = feed.split('<item>') @@ -1436,9 +1447,11 @@ def rss(type, jid, nick, text): rss_flush(jid,link,None) if text[4] == 'silent': nosend = True else: - if feed in [L('Encoding error!','%s/%s'%(jid,nick)),L('Unable to access server!','%s/%s'%(jid,nick))]: title = feed - else: title = html_encode(get_tag(feed,'title')) - msg = L('Bad url or rss/atom not found at %s - %s','%s/%s'%(jid,nick)) % (link,title) + if need_update_feed: + if feed in [L('Encoding error!','%s/%s'%(jid,nick)),L('Unable to access server!','%s/%s'%(jid,nick))]: title = feed + else: title = html_encode(get_tag(feed,'title')) + msg = L('Bad url or rss/atom not found at %s - %s','%s/%s'%(jid,nick)) % (link,title) + else: msg = feed if not nosend: send_msg(type, jid, nick, msg) def configure(type, jid, nick, text): diff --git a/scripts/mysql.schema b/scripts/mysql.schema index a7deb93..6c6a3b1 100644 --- a/scripts/mysql.schema +++ b/scripts/mysql.schema @@ -370,7 +370,8 @@ CREATE TABLE feed ( type text, time integer, room text, - hash text + hash text, + changed text ); CREATE INDEX feed_r ON feed (room(64)); diff --git a/scripts/pgsql.schema b/scripts/pgsql.schema index 89e7ae8..4340b66 100644 --- a/scripts/pgsql.schema +++ b/scripts/pgsql.schema @@ -363,7 +363,8 @@ CREATE TABLE feed ( type text, time integer, room text, - hash text array[10] + hash text array[10], + changed text ); CREATE INDEX feed_r ON feed (room); diff --git a/scripts/sqlite3.schema b/scripts/sqlite3.schema index 6858b4b..0280841 100644 --- a/scripts/sqlite3.schema +++ b/scripts/sqlite3.schema @@ -363,7 +363,8 @@ CREATE TABLE feed ( type text, time integer, room text, - hash text + hash text, + changed text ); CREATE INDEX feed_r ON feed (room); |