#yueStat3.py
#STAT THE yue.wikipedia
import re
import wikipedia
site=wikipedia.getSite()
#page to be read; page to be written; additional comment
pagename = ur'クイズタイムショック'
sandname = ur'Wikipedia:沙盒5'
comment = ur''
# OPEN THE SANDBOX
sand = wikipedia.Page(site, pagename) # OR ur'wikipedia:\u6C99\u76D2'
text =sand.get()
wikipedia.output('...sandbox opened')
# OPEN A FILE - sandboxstat.txt
#file = open('c:/python25/pywikipedia1/pywikipedia/yue/sandboxStat.txt', 'w+')
# OPEN ANOTHER SANDBOX
sand1 = wikipedia.Page(site, sandname) # OR ur'wikipedia:\u6C99\u76D21'
wikipedia.output('...sandbox1 opened')
# LOOPING UNTIL TEXT IS EMPTY
text1=[['crap',0],['more crap',0]] #SEED THE LIST WITH SOME CRAP
n= 0
while text!='':
print '%d'% n
char = text[0]
wikipedia.output(char)
if char=='.' or char=='^' or char=='$' or char=='*' or char=='+' or char=='?':
char='\\'+char
if char=='{' or char=='}' or char=='[' or char==']' or char=="(" or char==")":
char='\\'+char
if char== '!' or char=='\\' or char=='#' or char=='<' or char==">"or char=='-' or char=='|':
char='\\'+char
text,num = re.subn( char,'',text)
text1.append([char,num])
n += 1
wikipedia.output(char+u" occured" )
print('%d times' %num)
#TO SORT W.R.T. FREQUENCY OF CHARACTER
def compareFreq(a,b): return b[1]-a[1]
text1.sort(compareFreq)
#TO GET THE OUTPUT STRING
outputText=ur'[['+pagename+ur']]用咗(%d'%n+ur'+1)隻字同符號。 -~~~~\n[[category:維基百科統計]]\n\n'
for i in range (0,n):
outputText = outputText+text1[i][0]+',%d\n'%text1[i][1]
#PUT THE STRING TO SANDBOX1
sand1.put(outputText, u'機械人:[['+pagename+u']]統計 - [[user:R. Hillgentleman/yueStat.py]]'+comment )
wikipedia.stopme()
#########################################
# SOME COMMENTED OUT CRAP
#
#ge = re.compile(ur'嘅') # or ur'\u5605'
#br = re.compile(r'\{\{')
#bl = re.compile(r'\}\}')
#newstr , n = ge.subn('',text) # replace every ur'嘅' by empty string
#newstr1 , n1= br.subn('',newstr)
#newstr2 , n2= bl.subn('',newstr1)
#wikipedia.output( 'the number of of GE in sandbox is: ')
#print n
#print ('numbers of {{,}}in sandbox are:')
#print n1 , n2
#wikipedia.stopme()
############################################