#!/usr/bin/env python2.5 """SpamCop Denouncer v0.2 (SCD) Computer aided easy spam reporting. Daniele (mythsmith) Paganelli @ 2008 Public Domain http://daniele.modena1.it/code/spamcop-denouncer """ ##################### # EDIT THESE SETTINGS ##################### # SpamCop settings user_name='XXXX' # spamcop.net login information password='XXXX' max_age=12 # Accept only messages more recent than max_age (in hours). Spamcop.net internal maximum is 12h. notes='' # Standard optional notes to send to every recipient of your report (like a signature). You have opportunity to change it for every message # ALARM: alert when one of these domain is found as spam source (spamvertised) or in report recipients alarm=['modena1','agescimo','homelinux.org','seeweb'] # Special paths status_file='/home/daniele/.spamcop.dat' # Where SCD stores the cookie and id cache. tmp='/tmp/denounce' # Temporary file for downloaded mail tmpdir='/tmp' # Temp dir where to store cached repors read_file='/var/mail/cop' # Default file to scan for spamcop urls browser='less ' # Command to view the detailed spam report in textual form box='/home/daniele/.kde/share/apps/kmail/mail/Spambox' # Tweaks width=100 # Dialog width (adapt to your terminal) idcache=500 # Max number of IDs and hashes to remember in the status_file delay=7 # delay in paste mode for non-paying reporters # Optional email settings for fetchmail (-f) protocol='IMAP' # 'IMAP' or 'POP3' ssl=True IMAP_user='' IMAP_server='' IMAP_folder='' POP3_user='' POP3_server='' # END OF SETTINGS #------------------ # Default switches (warning: these will modify the behaviour of command line switches!) keep=True safe=True fetch=False read=False clearcache=False debug=False idmode=False paste=False mbox=False unreported=False queue=False ############## # PROGRAM ############## from httplib import HTTPConnection,HTTP from sys import argv,exit from urllib import urlencode,unquote_plus from commands import getstatusoutput as go from os import popen,remove,system,environ from subprocess import Popen,PIPE from sys import exit,stdin,stdout,stderr from optparse import OptionParser from cStringIO import StringIO from formatter import AbstractFormatter,DumbWriter from htmllib import HTMLParser from time import sleep,strftime,time import hashlib version='0.2' help=""" SpamCop Denouncer v%s http://daniele.modena1.it/code/spamcop-denouncer ---------------------- -h --help Print this help -v verbose SPAM SOURCE OPTIONS: -i --id= Process comma-separed SpamCop IDs -q --queue Process all your unreported spam at spamcop.net queue (very slow) -f --fetch Download new email to a file and get SpamCop IDs from it. -r --read= Read from file instead of downloading/pasting -p --paste Read file is standard input -m --mbox Read an mbox of spam messages and report them via the spamcop web interface. REPORTING OPTIONS: -n --notes= Send within each report -s --safe Safe reporting (default) -u --unsafe VERY DANGEROUS - Automated reporting EXIT OPTIONS: --no-keep --nk -d Don't keep mail/read file/mbox -k --keep Keep downloaded mail/read file/mbox after reporting (default) -c --clear Clear the SpamCop ID and hash cache (forget about already processed IDs) STATISTICS: -S --stats= Print statistics collected since the first run date, then exit. It will print some global stats, the reporting activity by -days periods, the top report recipent's domains. The most significant data are the SHI cost estimations. They mean "Seconds of Human Interaction", calculated from the _response_ to the first spam analysis prompt (dialog) till the end of the analysis. SHI values resume the time wasted due to the reporting activity. MUTUALLY EXCLUSIVE OPTIONS: These options cannot be set at the same time: fetch, read, idmode / f, r, i fetch, mbox, idmode / f, m, i fetch, read, paste / f, r, p keep, id / k, i keep, paste / k, p EXAMPLES: Using: --paste --mbox. , you can paste your spam message to stdin and report it as you would do with the web form. NOTICE: If any =. the defaults will be used. Example: spamcop -r. , will use the read file option hardcoded in the program header. """ %version environ["DIALOGOPTS"]='--single-quoted --colors --backtitle "SpamCop Denouncer v%s ' % version ua='User-Agent',"SpamCop Denouncer/%s (+http://daniele.modena1.it/code/spamcop-denouncer)" % version # If this string is found in a spamcop page, it means the cookie-login has expired: relogin=('\n\n', '
No userid found, sorry.
') submit_form={'action':'submit','oldverbose':'0','spam':None,'x1':'Process Spam','verbose':'1'} htime=None hash_cache=idcache # Max number of spam hashes to remember # add this string if a message is empty (but has headers) bodyless='This spam email did not have a body. This string was added for reporting purposes' ### Utilities def f(s,si,se): """Get a slice of s which begins with si and ends with se""" i=s.find(si)+len(si) if i<0: return False e=s.find(se,i) if e<0: return False return s[i:e] def multipart(fields): """From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306""" BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$' CRLF = '\r\n' L = [] for key in fields.keys(): L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('Content-Type: text/plain') L.append('') L.append(fields[key]) L.append('--' + BOUNDARY + '--') L.append('') body = CRLF.join(L) content_type = 'multipart/form-data; boundary=%s' % BOUNDARY return content_type, body def htdecode(a): """Simple html 2 text. Thanks to Michel Claveau: http://www.thescripts.com/forum/post86944-4.html""" f=StringIO() z=AbstractFormatter(DumbWriter (f)) p=HTMLParser(z) p.feed(unquote_plus(a)) p.close() sret=f.getvalue() f.close() return(sret) # Thanks to Daniel Yoo, http://mail.python.org/pipermail/python-list/2004-September/284991.html def backfileiter(myfile): """Iterates the lines of a file in reverse order.""" myfile.seek(0) offsets = _getLineOffsets(myfile) myfile.seek(0) offsets.reverse() for i in offsets: myfile.seek(i+1) yield myfile.readline() def _getLineOffsets(myfile): """Return a list of offsets where newlines are located.""" offsets = [-1] i = 0 while True: byte = myfile.read(1) if not byte: break elif byte == '\n': offsets.append(i) i += 1 return offsets ### def post(act,params,cl=True): """Cookie aware POST""" #body=urlencode(params) content_type,body=multipart(params) conn=HTTP('www.spamcop.net') conn.putrequest('POST', act) conn.putheader('Host', 'www.spamcop.net') #conn.putheader("Content-type", "application/x-www-form-urlencoded") conn.putheader("Content-type", content_type) if cl: conn.putheader('Cookie', cookie ) conn.putheader("Content-length", "%d" % len(body)) conn.putheader(ua[0],ua[1]) conn.endheaders() conn.send(body) reply, msg, hdrs = conn.getreply() page = conn.getfile().read() for logerr in relogin: if logerr in page: print 'Cookie expired.' login() post(act,params,cl) conn.close() return reply,msg,hdrs,page def login(): """Get a new cookielogin""" global cookie print 'Requesting a new authentication...' params={'username':user_name,'password':password,'duration':'+1y','action':'cookielogin','returnurl':'/'} reply,msg,hdrs,page=post('/mcgi',params,cl=False) cookie=hdrs['Set-Cookie'].split(';')[0] return reply,msg,hdrs,page def get(act): """Cookie aware GET""" conn=HTTP('www.spamcop.net') conn.putrequest('GET', act) conn.putheader('Host', 'www.spamcop.net') conn.putheader('Cookie', cookie ) conn.putheader(ua[0],ua[1]) conn.endheaders() reply, msg, hdrs = conn.getreply() page = conn.getfile().read() for logerr in relogin: if logerr in page: print 'Cookie expired.' login() get(act) conn.close() return reply,msg,hdrs,page meantime=0 def parameters(page): """Parse the reporting page to find out which are the reporting options""" global unreported,meantime cancel=False if 'Please wait - subscribe to remove this delay' in page: return (2,{},'delayed') if 'Reports regarding this spam have already been sent:' in page: return (1,{},'report already sent') if '
No body text provided, check format of submission.' in page: return (1,{},'body text not found, check format of submission') if '
ISP has indicated spam will cease; ISP resolved this issue' in page: return (1,{},'ISP has indicated spam will cease. Report cancelled.') old=f(page,'Message is','hours old') try: old=int(old.replace(' ','')) except: print '_/!\\_ Error parsing spam age. Continuing...' old=-1 if old>max_age: cancel=True olds='' if old>=0 and old<=max_age: olds='Message is %i hours old' %old if 'Sorry, this email is too old to file a spam report.' in page: return (1,{},'this email is too old to file a spam report. '+olds) if 'Yum, this spam is fresh!' in page: print '\tYum, this spam is fresh! '+olds else: print '\t'+olds if old=0: meantime+=old if not unreported: if "Unreported Spam Saved" in page: unreported=True params={} assure=f(page,'Please make sure this email IS spam:','') if page==False: return (1,{},'input form not found! Please check the ID manually.') page=page.split('')+len('
') e=page.find('

',i) page=page[i:e].replace('
','').replace('

','_/!\\_ Error: ').replace('
','') if 'Report canceled.' in page: page='\t Report cancelled.' return page def fetchmail(): print 'Fetching new mail to %s' %read_file exec('u=%s_user' %protocol) exec('srv=%s_server' %protocol) plus='' if protocol=='IMAP' and IMAP_folder: plus='--folder '+IMAP_folder if ssl: plus+=' --ssl ' cmd="fetchmail -Uk -p %s --username %s %s --bsmtp %s %s" % (protocol,u,plus,read_file,srv) system(cmd) def grep_urls(f): f=open(f,'r') urls=[] skip=0 lc=0 for line in backfileiter(f): lc+=1 if line[:29]=="http://www.spamcop.net/sc?id=": rid=line.split('=')[1].replace('\n','') if rid not in cache and rid not in urls: urls+=[rid] else: skip+=1 if skip>=idcache: print '\nIDCache limit of %i reached at line -%i' %(idcache,lc) break if skip>0: print '\nIgnored %i SpamCop IDs already in cache.' %skip f.close() return urls submitted=0 def submit(email): """Submits to spamcop webform and returns rid, page, params""" global submitted submit_form['spam']=email reply,msg,hdrs,page=post('/sc',submit_form) if reply!=302: print '_/!\\_ Submit error: ',reply,msg return False,'Connection Error' submit_form['spam']=None rid=hdrs['location'].split('=')[1] shash=hashlib.md5(email).hexdigest() if shash not in hache: submitted+=1 print '%i Submit hash: %s Got ID: %s' %(submitted,shash,rid) return rid,shash else: print '_/!\\_ Already submitted: the message is in hash cache with ID %s' % cache[hache.index(shash)] return False,'Hash in cache' def queue_next(): reply,msg,hdrs,page=get('/') if msg!='OK': print '_/!\\_Connection error: %s, %s' %(reply,msg) return 1 if "Unreported Spam Saved" not in page: print 'End queue' return False rid=f(page,'') return rid cache=[] hache=[] stats=[] cookie='code=a' def database(): """Read cookie, id cache, hash cache and statistics""" global cookie,cache,hache,stats try: status=open(status_file,'r').read().splitlines() cookie=status[0] h=0 if len(status)>3 and not clearcache: for l in status[2:]: if l in ['---HASH---']: h=1 continue if l in ['---STATS---']: h=2 continue if h==0: cache+=[l] elif h==1: hache+=[l] elif h==2: stats+=[l] except: cookie='code=a' def print_stats(each,top): """Output some statistics""" collect={}; dstats={} meanP=0; tP=0 meanS=0; tS=0 meanT=0; hcost=0.; mcost=0. for s in stats: # Aggregate stats sv=s.replace('\t','').replace(' ','').split(';') day=sv[1].split('/') day=int(day[0])*30+int(day[1])+int(day[2])*365 day=day//each if day not in collect.keys(): collect[day]=[sv[1],0,0,0,0.,0] collect[day][1]+=int(sv[2]) tP+=int(sv[2]) collect[day][2]+=int(sv[4]) tS+=int(sv[4]) collect[day][3]+=1 collect[day][4]+=float(sv[6]) hcost+=float(sv[6]) meantime=int(sv[7]) meanT+=meantime collect[day][5]+=meantime for ndom in sv[8].split(':'): # Aggregate domain stats if '@' not in ndom: continue [n,dom]=ndom.split('@') if dom not in dstats.keys(): dstats[dom]=0 dstats[dom]+=int(n) mcost=hcost/tS meanT=1.*meanT/tS meanP=1.*tP/len(collect.keys()) meanS=1.*tS/len(collect.keys()) sday=collect.keys() sday.sort() pe='days' if each==1: pe='day' print '\nSTATISTICS since %s (%i %s period):' % (collect[sday[0]][0],each,pe) print 'Reporting quality: %7.3fh of mean spam age' % meanT print 'Total time cost: %6.1fshi, efficiency: %6.1fs/spam'% (hcost,mcost) print 'Processed: %i, %5.1f/period - Reported: %i, %5.1f/period' % (tP,meanP,tS,meanS) print '\nReporting activity (%i %s period):' % (each,pe) print '%-8s %-9s %-8s %-8s %-6s %-7s' % ('day','processed','reported','sessions','cost','quality') for day in sday: d=collect[day] d[-1]=1.*d[-1]/d[2] print '%-8s %-9i %-8i %-8i %6.2f %6.2f' % tuple(d) print '\nOverall top %i report destinations' % top dk=dstats.keys() dk.sort(lambda x,y: dstats[y]-dstats[x]) rep=len(dk) if rep>top: rep=top for i in range(rep): print '%-25s %i' % (dk[i],dstats[dk[i]]) print ##################################################### # Process Reports=[] OkReports=[] c=0 def process_urls(urls): global Reports,c,OkReports tot=len(urls) print '\nProcessing %i spamcop report links:' % tot for rid in urls: c+=1 # url from input url='/sc?id='+rid print '\n[%.1f%%] Processing SpamCop ID: %s' % (100.*c/tot,rid) # get reporting page status=2 while status==2: reply,msg,hdrs,page=get(url) if msg!='OK': print '_/!\\_ Connection Error: %s %s' % (reply,msg) continue # extract parameters status,params,sm=parameters(page) if status==2: print 'Delayed %i seconds' %delay sleep(delay) if status==1: print '_/!\\_ Parsing error: ',sm continue elif status==3: print '_/!\\_ Removing: ',sm OkReports+=[(rid,params)] continue tmpage='%s/spamcop-%s.txt' % (tmpdir,rid) page=htdecode(page) open(tmpage,'w').write(page) Reports+=[(rid,params)] ##################################################### # Analysis cr=0 def analysis(Reports): global OkReports,tot,cr,htime rdict={} tot=len(Reports) print '\nAnalyzing %i spam-reports:' % tot i=0 while i1: msg.set_payload(bodyless) rid,shash=submit(msg.as_string()) if rid: urls+=[rid] hache+=[shash] process_urls(urls) if safe: analysis(Reports) else: cr=len(Reports) OkReports=Reports if not queue: commit(OkReports) # Update caches cache=urls+cache l=len(cache) if l>idcache: cache=cache[ : (l-idcache)] l=len(hache) if l>hash_cache: hache=hache[(l-hash_cache):] sdstat='' for d in dstats.keys(): sdstat+='%i@%s:' % (dstats[d],d) if c!=0: stats+=['%s\t; %i\t; %i\t; %i\t; %i\t; %6.2f\t; %i\t; %s' % (strftime('%X ; %x'),c,cr,cs,canc,htime,meantime,sdstat[:-1])] # Commit database dat=open(status_file,'w') dat.write(cookie+'\n---ID---\n') for rid in cache: dat.write(rid+'\n') dat.write('---HASH---\n') for shash in hache: dat.write(shash+'\n') dat.write('---STATS---\n') for s in stats: dat.write(s+'\n') dat.close() # Print session stats if htime==None: htime=0. print """Done: %i emails processed %i emails analyzed %i reports sent %i reports cancelled Time cost: %6.2fshi""" % (c,cr,cs,canc,htime) # Cleanup if not keep+idmode+paste+queue+keep+mbox: remove(read_file) print 'File: %s, removed' %read_file if mbox and not keep: open(box,'w').write('') print 'Mailbox: %s, cleared' %box