Skip to content
Snippets Groups Projects
Commit 84ebd1ec authored by Benoit Favre's avatar Benoit Favre
Browse files

initial commit

parents
No related branches found
No related tags found
No related merge requests found
Copyright 2020 Benoit Favre
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Demo site
=========
Simple website for searching a set of articles labelled with topics.
Data
----
Data is assumed to be a liste of articles in json format with a field 'topic-pred' containing a list of topics.
Example:
```
{
'title': 'Title of the article',
'abstract': 'Abstract of the article',
'authors': 'List of authors as a string',
'publication_date': '2020-05-28',
'url': 'https://url-to-full-text',
'topic-pred': [ 'topic1', 'topic2', ...]
}
```
The included search engine is a very simple search engine implementing BM25 ranking after stopword removal.
Running
-------
```
npm install
node create-db.js papers-with-topics.json
npm start
```
const fs = require('fs');
var load = function(filename) {
console.log('loading json...');
const articles = JSON.parse(fs.readFileSync(filename));
console.log(articles.length, 'articles');
console.log('saving in db...');
articles.sort((a, b) => -a.publication_date.localeCompare(b.publication_date));
const index = {};
var id = 0;
for(var article of articles) {
article._id = id;
for(var topic of article['topic-pred']) {
if(!(topic in index)) index[topic] = [];
index[topic].push(id);
}
id ++;
}
for(var topic in index) {
index[topic].sort((a, b) => -articles[a].publication_date.localeCompare(articles[b].publication_date));
}
var data = {
index: index,
articles: articles,
};
fs.writeFileSync('data.json', JSON.stringify(data));
console.log('indexing...');
const search_engine = require('./search');
search_engine.build_index(articles.map((doc) => doc.title + ' ' + doc.abstract));
search_engine.save_index('data.bm25');
}
if(process.argv.length < 3) {
console.log('usage: node create-db.js <json-data-path>');
} else {
load(process.argv[2]);
}
model.js 0 → 100644
"use strict"
const fs = require('fs');
const {index, articles} = JSON.parse(fs.readFileSync('data.json').toString());
exports.read = (id, callback) => {
callback(null, articles[id]);
};
exports.for_topic = (topic, page, callback) => {
const num_per_page = 36;
topic = topic || 'All';
page = parseInt(page || 1);
var query = {'topic-pred': topic};
var results, num_found;
if(topic === 'All') {
results = articles.slice((page - 1) * num_per_page, page * num_per_page);
num_found = articles.length;
} else if(topic in index) {
results = index[topic].slice((page - 1) * num_per_page, page * num_per_page).map((id) => articles[id]);
num_found = index[topic].length;
} else {
results = [];
num_found = 0;
}
callback(null, {
results: results,
num_found: num_found,
next_page: page + 1,
topic: topic,
page: page,
num_pages: parseInt(num_found / num_per_page) + 1,
});
}
const search_engine = require('./search');
search_engine.load_index('data.bm25');
exports.search = (query, page, callback) => {
const num_per_page = 32;
query = query || "";
page = parseInt(page || 1);
const found = search_engine.search(query);
const num_found = found.length;
const results = found.slice((page - 1) * num_per_page, page * num_per_page).map(result => articles[result[0]]);
callback(null, {
results: results,
num_found: num_found,
query: query,
next_page: page + 1,
page: page,
num_pages: parseInt(num_found / num_per_page) + 1,
});
};
exports.topics = ['All', 'Prevention', 'Diagnosis', 'Treatment', 'Case Report', 'Mechanism', 'Transmission', 'Epidemic Forecasting', 'General Info'];
exports.elidate = (articles) => {
articles = JSON.parse(JSON.stringify(articles)); // deep copy
for(var i = 0; i < articles.length; i++) {
var article = articles[i];
var authors = article.authors.split(';');
if(authors.length > 3) {
article['authors'] = authors.slice(0, 3).join(';') + ' et al.';
}
if(article.abstract.length > 200) article.abstract = article.abstract.substr(0, 200) + '...';
}
return articles;
}
{
"name": "paper-server",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"accepts": {
"version": "1.3.7",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
"integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==",
"requires": {
"mime-types": "~2.1.24",
"negotiator": "0.6.2"
}
},
"array-flatten": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
"integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI="
},
"async": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/async/-/async-3.1.1.tgz",
"integrity": "sha512-X5Dj8hK1pJNC2Wzo2Rcp9FBVdJMGRR/S7V+lH46s8GVFhtbo5O4Le5GECCF/8PISVdkUA6mMPvgz7qTTD1rf1g=="
},
"body-parser": {
"version": "1.19.0",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
"integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==",
"requires": {
"bytes": "3.1.0",
"content-type": "~1.0.4",
"debug": "2.6.9",
"depd": "~1.1.2",
"http-errors": "1.7.2",
"iconv-lite": "0.4.24",
"on-finished": "~2.3.0",
"qs": "6.7.0",
"raw-body": "2.4.0",
"type-is": "~1.6.17"
}
},
"bytes": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
"integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg=="
},
"content-disposition": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz",
"integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==",
"requires": {
"safe-buffer": "5.1.2"
}
},
"content-type": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
"integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA=="
},
"cookie": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz",
"integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg=="
},
"cookie-signature": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
"integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw="
},
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"depd": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
"integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak="
},
"destroy": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
"integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
},
"ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
},
"encodeurl": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
"integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k="
},
"escape-html": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
"integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg="
},
"etag": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
"integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc="
},
"express": {
"version": "4.17.1",
"resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
"integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==",
"requires": {
"accepts": "~1.3.7",
"array-flatten": "1.1.1",
"body-parser": "1.19.0",
"content-disposition": "0.5.3",
"content-type": "~1.0.4",
"cookie": "0.4.0",
"cookie-signature": "1.0.6",
"debug": "2.6.9",
"depd": "~1.1.2",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"finalhandler": "~1.1.2",
"fresh": "0.5.2",
"merge-descriptors": "1.0.1",
"methods": "~1.1.2",
"on-finished": "~2.3.0",
"parseurl": "~1.3.3",
"path-to-regexp": "0.1.7",
"proxy-addr": "~2.0.5",
"qs": "6.7.0",
"range-parser": "~1.2.1",
"safe-buffer": "5.1.2",
"send": "0.17.1",
"serve-static": "1.14.1",
"setprototypeof": "1.1.1",
"statuses": "~1.5.0",
"type-is": "~1.6.18",
"utils-merge": "1.0.1",
"vary": "~1.1.2"
}
},
"finalhandler": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz",
"integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==",
"requires": {
"debug": "2.6.9",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"on-finished": "~2.3.0",
"parseurl": "~1.3.3",
"statuses": "~1.5.0",
"unpipe": "~1.0.0"
}
},
"forwarded": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz",
"integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ="
},
"fresh": {
"version": "0.5.2",
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
"integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac="
},
"http-errors": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
"integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==",
"requires": {
"depd": "~1.1.2",
"inherits": "2.0.3",
"setprototypeof": "1.1.1",
"statuses": ">= 1.5.0 < 2",
"toidentifier": "1.0.0"
},
"dependencies": {
"inherits": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
}
}
},
"iconv-lite": {
"version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
"integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
"requires": {
"safer-buffer": ">= 2.1.2 < 3"
}
},
"ipaddr.js": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="
},
"lru-cache": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
"integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
"requires": {
"yallist": "^3.0.2"
}
},
"media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
"integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g="
},
"merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
"integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E="
},
"methods": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
"integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4="
},
"mime": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
},
"mime-db": {
"version": "1.43.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.43.0.tgz",
"integrity": "sha512-+5dsGEEovYbT8UY9yD7eE4XTc4UwJ1jBYlgaQQF38ENsKR3wj/8q8RFZrF9WIZpB2V1ArTVFUva8sAul1NzRzQ=="
},
"mime-types": {
"version": "2.1.26",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.26.tgz",
"integrity": "sha512-01paPWYgLrkqAyrlDorC1uDwl2p3qZT7yl806vW7DvDoxwXi46jsjFbg+WdwotBIk6/MbEhO/dh5aZ5sNj/dWQ==",
"requires": {
"mime-db": "1.43.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
},
"mustache": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/mustache/-/mustache-3.2.1.tgz",
"integrity": "sha512-RERvMFdLpaFfSRIEe632yDm5nsd0SDKn8hGmcUwswnyiE5mtdZLDybtHAz6hjJhawokF0hXvGLtx9mrQfm6FkA=="
},
"mustache-express": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/mustache-express/-/mustache-express-1.3.0.tgz",
"integrity": "sha512-JWG8Rzxh9tpoLEH0NZ2u/caDiwhIkW+50IOBrcO+lHya3tCYj41bYPDEHCxPbKXvPrSyMNpI6ly4xdU2zpNQtg==",
"requires": {
"async": "~3.1.0",
"lru-cache": "~5.1.1",
"mustache": "^3.1.0"
}
},
"negotiator": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
"integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw=="
},
"on-finished": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",
"integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=",
"requires": {
"ee-first": "1.1.1"
}
},
"parseurl": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
"integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="
},
"path-to-regexp": {
"version": "0.1.7",
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
"integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w="
},
"proxy-addr": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz",
"integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==",
"requires": {
"forwarded": "~0.1.2",
"ipaddr.js": "1.9.1"
}
},
"qs": {
"version": "6.7.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
"integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ=="
},
"range-parser": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
"integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="
},
"raw-body": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz",
"integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==",
"requires": {
"bytes": "3.1.0",
"http-errors": "1.7.2",
"iconv-lite": "0.4.24",
"unpipe": "1.0.0"
}
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
"safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"send": {
"version": "0.17.1",
"resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz",
"integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==",
"requires": {
"debug": "2.6.9",
"depd": "~1.1.2",
"destroy": "~1.0.4",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"fresh": "0.5.2",
"http-errors": "~1.7.2",
"mime": "1.6.0",
"ms": "2.1.1",
"on-finished": "~2.3.0",
"range-parser": "~1.2.1",
"statuses": "~1.5.0"
},
"dependencies": {
"ms": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
"integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
}
}
},
"serve-static": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz",
"integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==",
"requires": {
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"parseurl": "~1.3.3",
"send": "0.17.1"
}
},
"setprototypeof": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
"integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw=="
},
"statuses": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
"integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow="
},
"toidentifier": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
"integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw=="
},
"type-is": {
"version": "1.6.18",
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
"integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
"requires": {
"media-typer": "0.3.0",
"mime-types": "~2.1.24"
}
},
"unpipe": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
},
"utils-merge": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
"integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM="
},
"vary": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
"integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw="
},
"yallist": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz",
"integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g=="
}
}
}
{
"name": "paper-server",
"version": "1.0.0",
"description": "",
"main": "server.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"body-parser": "^1.19.0",
"express": "^4.17.1",
"mustache-express": "^1.3.0"
}
}
search.js 0 → 100644
"use strict"
var fs = require('fs');
var stopwords = fs.readFileSync('stopwords.txt').toString().split('\n').reduce((set, line) => {set.add(line.trim()); return set}, new Set());
var index = {
termFrequency: new Map(),
documentLength: [],
averageDocumentLength: 0,
k1: 1.2,
b: 0.75,
}
function tokenize(text) {
var tokens = [];
for(var word of text.trim().toLowerCase().replace(/[^a-z0-9-]/g, ' ').split(/\s+/)) {
if(!stopwords.has(word)) {
tokens.push(word);
}
}
return tokens;
}
function vectorize(text) {
var vector = new Map();
for(var word of tokenize(text)) {
if(!vector.has(word)) vector.set(word, 1);
else vector.set(word, 1 + vector.get(word));
}
return vector;
}
function add_to_index(text, id) {
const termFrequency = index.termFrequency;
var documentLength = 0;
vectorize(text).forEach((count, word) => {
if(!index.termFrequency.has(word)) termFrequency.set(word, []);
termFrequency.get(word).push(id, count);
documentLength += count;
});
return documentLength;
}
function build_index(entries) {
var i = 0;
for(var text of entries) {
const length = add_to_index(text, i);
index.documentLength.push(length);
index.averageDocumentLength += length;
i++;
}
index.averageDocumentLength /= index.documentLength.length;
}
function load_index(filename) {
index = JSON.parse(fs.readFileSync(filename).toString());
index.termFrequency = new Map(index.termFrequency);
}
function save_index(filename) {
const backup = index.termFrequency;
index.termFrequency = Array.from(backup.entries());
fs.writeFileSync(filename, JSON.stringify(index));
index.termFrequency = backup;
}
function inverseDocumentFrequency(word) {
if(index.termFrequency.has(word)) {
const documentFrequency = index.termFrequency.get(word).length / 2;
return Math.log( (index.documentLength.length - documentFrequency + 0.5) / (documentFrequency + 0.5) );
}
return 0;
}
function okapi_bm25(query) {
var found = new Map();
tokenize(query).forEach((word) => {
if(index.termFrequency.has(word)) {
const idf = inverseDocumentFrequency(word);
const entries = index.termFrequency.get(word);
for(var i = 0; i < entries.length; i += 2) {
const doc = entries[i], termFrequency = entries[i + 1];
const score = idf * (termFrequency * (index.k1 + 1)) / termFrequency + index.k1 * (1 - index.b + index.b * index.documentLength[doc] / index.averageDocumentLength);
if(!found.has(doc)) found.set(doc, score);
else found.set(doc, score + found.get(doc));
}
}
});
return found;
}
function search(query) {
query = query || "";
var found = okapi_bm25(query);
var results = Array.from(found.entries());
results.sort((a, b) => b[1] - a[1]);
return results;
};
//var docs = JSON.parse(fs.readFileSync(process.argv[2]).toString());
//build_index(docs.map(doc => doc.title + ' ' + doc.abstract));
//save_index('index.bm25');
//load_index('index.bm25');
//console.log(search('virus'));
module.exports = {
build_index: build_index,
save_index: save_index,
load_index: load_index,
search: search,
};
"use strict"
var express = require('express');
var mustache = require('mustache-express');
var model = require('./model');
var app = express();
app.engine('html', mustache());
app.set('view engine', 'html');
app.set('views', './views');
app.use((req, res, next) => {
res.locals.topic_menu = model.topics.map((topic) => ({name: topic}));
return next();
});
app.get('/', (req, res) => {
res.redirect('/topic/All');
});
app.get('/article/:id', (req, res) => {
model.read(req.params.id, (err, doc) => {
if(err) {
res.status(500).send(err);
} else {
res.render('read', doc);
}
});
});
app.get('/topic/:topic', (req, res) => {
res.locals.topic_menu = model.topics.map((topic) => ({name: topic, active: topic === req.params.topic}));
model.for_topic(req.params.topic, req.query.page, (err, found) => {
found.results = model.elidate(found.results);
res.render('search', found);
});
});
app.get('/search', (req, res) => {
model.search(req.query.query, req.query.page, (err, found) => {
found.results = model.elidate(found.results);
res.render('search', found);
});
});
app.use(express.static('old/public'))
app.listen(8892, () => console.log('listening on http://localhost:8892'));
a
about
above
across
after
afterwards
again
against
al
all
almost
alone
along
already
also
although
always
am
among
amongst
an
analyze
and
another
any
anyhow
anyone
anything
anywhere
applicable
apply
are
around
as
assume
at
be
became
because
become
becomes
becoming
been
before
beforehand
being
below
beside
besides
between
beyond
both
but
by
came
cannot
cc
cm
come
compare
could
de
dealing
department
depend
did
discover
dl
do
does
during
each
ec
ed
effected
eg
either
else
elsewhere
enough
et
etc
ever
every
everyone
everything
everywhere
except
find
for
found
from
further
get
give
go
gov
had
has
have
he
hence
her
here
hereafter
hereby
herein
hereupon
hers
herself
him
himself
his
how
however
hr
ie
if
ii
iii
in
inc
incl
indeed
into
investigate
is
it
its
itself
j
jour
journal
just
kg
last
latter
latterly
lb
ld
letter
like
ltd
made
make
many
may
me
meanwhile
mg
might
ml
mm
mo
more
moreover
most
mostly
mr
much
must
my
myself
namely
neither
never
nevertheless
next
no
nobody
noone
nor
not
nothing
now
nowhere
of
off
often
on
only
onto
or
other
others
otherwise
our
ours
ourselves
out
over
own
oz
per
perhaps
pm
precede
presently
previously
pt
rather
regarding
relate
said
same
seem
seemed
seeming
seems
seriously
several
she
should
show
showed
shown
since
so
some
somehow
someone
something
sometime
sometimes
somewhere
still
studied
sub
such
take
tell
th
than
that
the
their
them
themselves
then
thence
there
thereafter
thereby
therefore
therein
thereupon
these
they
this
thorough
those
though
through
throughout
thru
thus
to
together
too
toward
towards
try
type
ug
under
unless
until
up
upon
us
used
using
various
very
via
was
we
were
what
whatever
when
whence
whenever
where
whereafter
whereas
whereby
wherein
whereupon
wherever
whether
which
while
whither
who
whoever
whom
whose
why
will
with
within
without
wk
would
wt
yet
you
your
yours
yourself
yourselves
yr
</main>
</html>
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>CORD-19 topics</title>
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
</head>
<body>
<nav class="navbar navbar-expand-md navbar-dark bg-dark">
<div class="collapse navbar-collapse" id="navbarsExampleDefault">
<a class="navbar-brand" href="/">Topics:</a>
<div class="navbar-nav-scroll ">
<ul class="navbar-nav bd-navbar-nav flex-row">
{{#topic_menu}}
<li class="nav-item">
<a href="/topic/{{name}}" class="nav-link {{#active}}active{{/active}}">{{name}}</a>
</li>
{{/topic_menu}}
</ul>
</div>
<form class="form-inline my-2 my-lg-0 ml-auto" action="/search">
<input class="form-control mr-sm-2" type="text" placeholder="Requête" name="query" aria-label="Search" value="{{query}}">
<input class="btn btn-secondary my-2 my-sm-0" type="submit" value="Rechercher">
</form>
</div>
</nav>
<main role="main" class="container mt-3 mb-5">
{{> header}}
<div class="row">
<div class="col">
<h1>{{title}}</h1>
<p class="text-muted">{{authors}}</p>
<p>{{publication_date}}</p>
<p>
{{#topic-pred}}
<span class="badge badge-secondary">{{.}}</span>
{{/topic-pred}}
</p>
<p><strong>Abstract :</strong> {{abstract}}</p>
<p>
<a class="btn btn-outline-primary" href="{{url}}" target="_blank">Read full-text</a>
</p>
</div>
</div>
{{> footer}}
{{> header}}
<div class="row row-cols-1 row-cols-md-2 row-cols-lg-3">
{{#results}}
<div class="col mb-3">
<div class="card mb-4 h-100">
<div class="card-body">
<h5 class="card-title"><a href="/article/{{_id}}">{{title}}</a></h5>
<h6 class="card-subtitle text-muted">{{authors}}</h6>
<p class="card-text">
{{#topic-pred}}
<span class="badge badge-secondary">{{.}}</span>
{{/topic-pred}}
</p>
</div>
<div class="card-footer">
<a class="btn btn-outline-primary" href="{{url}}">Full text</a>
<span class="card-text text-muted float-right">{{publication_date}}</span>
</div>
</div>
</div>
{{/results}}
</div>
{{#next_page}}
<div class="row">
<div class="col text-center">
{{num_found}} articles {{#page}} (page {{page}}/{{num_pages}}) {{/page}}
<a class="btn btn-primary ml-3" href="/topic/{{topic}}?page={{next_page}}">Next page</a>
</div>
</div>
{{/next_page}}
{{> footer}}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment