-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathothersolution.js
102 lines (88 loc) · 3.15 KB
/
othersolution.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
'use strict';
const fs = require('fs');
const readline = require('readline');
const { performance: perf } = require('perf_hooks');
/**
* Finds the largest value of all fields in `obj`
* and returns its field name (key).
*/
function maxEntry(obj) {
let maxValue = 0;
let maxKey = null;
for (let key of Object.keys(obj)) {
if (obj[key] > maxValue) {
maxKey = key;
maxValue = obj[key];
}
}
return maxKey;
}
function getFirstName(name) {
// this code copied verbatim from [Paige Niedringhaus](https://github.com/paigen11/file-read-challenge/blob/master/readFileStream.js)
let firstHalfOfName = name.split(', ')[1];
if (firstHalfOfName !== undefined) {
firstHalfOfName.trim();
// filter out middle initials
if (firstHalfOfName.includes(' ') && firstHalfOfName !== ' ') {
let firstName = firstHalfOfName.split(' ')[0];
return firstName.trim();
} else {
return firstHalfOfName;
}
}
}
function getFirstNameRegEx(name) {
// idea for this taken from [Stuart Marks](https://stuartmarks.wordpress.com/2019/01/11/processing-large-files-in-java/)
// the documentation assures me that a regex in this form is compiled at parse-time
// and is indeed a constant in memory.
const regex = /, (\S+)/;
const match = regex.exec(name);
return match && match[1];
}
const numberFormat4 = new Intl.NumberFormat('en-us', {
maximumSignificantDigits: 4,
});
const numberFormatFull = new Intl.NumberFormat('en-us');
function main(args) {
console.log(`Opening file '${args[0]}'.`);
const rl = readline.createInterface({
input: fs.createReadStream(args[0]),
crlfDelay: Infinity,
});
const timeStartReadLoop = perf.now();
let timeInsideReadLoop = 0;
const nameCounts = {};
let lineCount = 0;
rl.on('line', line => {
const timeStartInside = perf.now();
lineCount++;
const fields = line.split('|');
const date = fields[4];
const name = getFirstNameRegEx(fields[7]); // to count full names, just use `fields[7];`
nameCounts[name] = (nameCounts[name] || 0) + 1;
console.log(nameCounts);
timeInsideReadLoop += perf.now() - timeStartInside;
});
rl.on('close', function() {
const totalTime = numberFormat4.format(perf.now() - timeStartReadLoop);
const insideTime = numberFormat4.format(timeInsideReadLoop);
console.log(`Total time for reading and processing file: ${totalTime}ms.`);
console.log(
`Thereof time spent in custom processing code: ${insideTime}ms.`,
);
console.log(`Dataset has ${numberFormatFull.format(lineCount)} entries.`);
const numUniqueNames = numberFormatFull.format(
Object.keys(nameCounts).length,
);
console.log(`${numUniqueNames} different names found in file.`);
const name = maxEntry(nameCounts);
const nameOccurrences = numberFormatFull.format(nameCounts[name]);
console.log(
`The most common name is '${name}' with ${nameOccurrences} occurrences.`,
);
});
console.log(`Getting started...`);
}
// note that in older versions of node, you need to `.splice(process.execArgv.length + 2)`
// but my v10.15 seems to already remove the runtime's arguments from the program's.
main(process.argv.splice(2));