-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdistancelist.py
81 lines (66 loc) · 1.92 KB
/
distancelist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# finds the distance between words
# feature= words distance
def addword(dict,word) :
if not word in dict :
dict[word] = []
def finddistance(word,index,list) :
try :
next = list.index(word,index+1)
# print "next element at: ",next
return next-index
except :
return 0
def adddistance(distance,dict,word) :
l = dict[word]
if distance != 0 :
l.append(distance)
def Distance(file) : #file is a string
list = file.split()
dict = {}
i = 0
while i<len(list) :
# print "for the word :",word
index = i
word = list[i]
# print "index is: ",index
addword(dict,word)
# print "after adding to dict:",dict
distance = finddistance(word,index,list)
# print "distance bw words: ",distance
adddistance(distance,dict,word)
# print "after loop dict: ",dict
i = i+1
return dict
def fun(mails,arrK) : #mails is list of mails(string), arrk is array of dict of every mail
mailNo=0
listO = []
for mail in mails :
file = mail.split()
dict = Distance(mail)
for word in file :
dict1 = arrK[mailNo]
key = dict1[word]
list1 = [mailNo,key,dict[word]]
listO.append(list1)
mailNo = mailNo+1
return listO #listO of the form:- [mailno,key,feature]
'''
def id_words(all_text):
mail = 0
iden = 0
ar = []
for mail in all_text:
words = [word for word in mail.split()]
words.sort()
dic = {}
for word in words:
if word not in dic.keys():
dic.update({word: iden})
iden = iden + 1
ar.append(dic)
return ar
text = ["text hai ki Anup chUtiys Hai hai Anup ki text hai hai hai chUtiya gandu gandu", "anup bhosDika Madarchod phir se anup loda bhosDika bina LunD KA also also", "CHUT pHati KA hai anup KA CHUT pHati" ]
ar = id_words(text)
print ar
print fun(text,ar)
'''