This repository has been archived by the owner on Dec 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathFile.scala
143 lines (106 loc) · 3.35 KB
/
File.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// File.scala
// This is the data structure of the input files
package IO
import scala.collection.mutable.ArrayBuffer
/** The abstract file **/
trait AbstractFile {
def row(i: Int) : ArrayBuffer[String]
def rowCount : Int
def columnNames : ArrayBuffer[String]
def columnCount : Int
def rowsContaining(s: String): Array[Int]
def equal(i: Int, row: ArrayBuffer[String]): Double //wheter row i in this file is the same as given row, returning how similar they are
}
/** The implemented File data structure. Input file location
initialize with File(initLocation) to use default deleimiter
We assume the first line is the names of columns (the header).
**/
class File(initLocation: String, delimiter: String) extends AbstractFile {
private val source = io.Source.fromFile(initLocation)
private var rows: ArrayBuffer[ArrayBuffer[String]] = new ArrayBuffer(5)
private var columns: ArrayBuffer[String] = new ArrayBuffer(1)
private var numberColumns = 0
private var numberRows = 0
private var size = 5
loadFromSource //load
/** Set default CSV delimiter to "," **/
def this(i: String) { this(i, ",") }
private def loadFromSource {
val lines = source.getLines
//load header
val firstLine: String = lines.next
columns = parseLine(firstLine)
numberColumns = columns.length
//load data
for (line <- lines){
checkSpace
rows += parseLine(line)
numberRows += 1
}
}
private def parseLine(line: String): ArrayBuffer[String] = {
val array: Array[String] = line.split("\"" + delimiter + "\"").map(_.trim)
val buf: ArrayBuffer[String] = new ArrayBuffer(array.length)
//copy into arraybuffer
for (v <- array)
buf += v
//remove first "
if (buf(0).length > 0 && buf(0)(0)=='"')
buf(0) = buf(0).drop(1)
//remove last "
val last = buf.length - 1
if (buf(last).length > 0 && buf(last).takeRight(1)=="\"")
buf(last) = buf(last).dropRight(1)
buf
}
private def checkSpace {
if (size > numberRows) return
val n: ArrayBuffer[ArrayBuffer[String]] = new ArrayBuffer(2*size)
for (i<- 0 until numberRows)
n += rows(i)
size = size * 2
rows = n
}
def row(i: Int): ArrayBuffer[String] = {
assert( i>=0 && i < rowCount)
rows(i)
}
def returnArray(row_num:Int, col_num:Int): Array[Array[String]] = {
val big_array = new Array[Array[String]](row_num)
var i = 0
while (i != row_num) {
var entry = new Array[String](col_num)
var v = row(i)
for (j <- 0 to col_num -1) {
entry(j) = v(j).toString
}
big_array(i) = entry
i += 1
}
big_array
}
def rowCount: Int = numberRows
def columnNames: ArrayBuffer[String] = columns
def columnCount = numberColumns
def rowsContaining(s: String): Array[Int] = {
var matchingRows: Array[Int] = Array()
for(row <- 0 until numberRows)
for (col <- 0 until rows(row).length)
if (rows(row)(col) == s)
matchingRows = matchingRows :+ row
matchingRows
}
def equal(i: Int, row: ArrayBuffer[String]): Double = {
var j = 0
var counter = 0
var perc : Double = 0
val therow = rows(i)
while (j < row.length && j < therow.length){
if (row(j) == therow(j))
counter += 1
j += 1
}
perc = counter / row.length
return perc
}
}