import re
wordListPattern=re.compile('[a-z]+',re.IGNORECASE)
#------------------------------------------
# quran
#------------------------------------------
quranFile=open("C:\\Users\\Omar\\Desktop\\QuranEnglishUS\\superquran.txt")
quranLines=quranFile.readlines()
quranFile.close()
lineNumber=0
numberOfLine=len(quranLines)
quranWordList=[]
for line in quranLines:
line=line.lower()
lineList=wordListPattern.findall(line)
quranWordList.extend(lineList)
lineNumber=lineNumber+1
#print(lineNumber,":",numberOfLine)
#print(lineNumber, ":" ,lineList)
print('end quran dump')
quranWordSet=set(quranWordList)
#------------------------------------------
# bible
#------------------------------------------
bibleFile=open("C:\\Users\\Omar\\Desktop\\webtxt\\superbible.txt")
bibleLines=bibleFile.readlines()
bibleFile.close()
lineNumber=0
numberOfLine=len(bibleLines)
bibleWordList=[]
for line in bibleLines:
line=line.lower()
lineList=wordListPattern.findall(line)
bibleWordList.extend(lineList)
lineNumber=lineNumber+1
#print("Bible ",lineNumber,":",numberOfLine)
print('end bible dump')
bibleWordSet=set(bibleWordList)
#------------------------------------------
# common word list
#------------------------------------------
commonFile=open("C:\\Users\\Omar\\Desktop\\commonword1000.txt")
commonLines=commonFile.readlines()
commonFile.close()
lineNumber=0
numberOfLine=len(commonLines)
commonWordList=[]
for line in commonLines:
line=line.lower()
lineList=wordListPattern.findall(line)
commonWordList.extend(lineList)
lineNumber=lineNumber+1
print('end common dump')
commonWordSet=set(commonWordList)
intersectionWordSet = quranWordSet.intersection(bibleWordSet)
differenceWordSet = intersectionWordSet.difference(commonWordSet)
sortedWordSet= sorted(differenceWordSet)
#print(sorted(intersectionWordSet))
print('---cleaned----')
#print())
for s in sortedWordSet:
print (s)
Sunday, March 18, 2012
Read Quran Text file Compare it with Bible and Remove Common Words
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment