Commit 81242fc8 authored by Emmanuel Raviart's avatar Emmanuel Raviart
Browse files

Initial commit

parents
/node_modules/
{
// See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations.
// Extension identifier format: ${publisher}.${name}. Example: vscode.csharp
// List of extensions which should be recommended for users of this workspace.
"recommendations": [
"bradlc.vscode-tailwindcss",
"EditorConfig.EditorConfig",
"esbenp.prettier-vscode",
"svelte.svelte-vscode"
],
// List of extensions recommended by VS Code that should not be recommended for users of this workspace.
"unwantedRecommendations": []
}
{
"editor.formatOnSave": true,
"[css]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[html]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[javascript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[json]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[markdown]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[svelte]": {
"editor.defaultFormatter": "svelte.svelte-vscode"
}
}
# EZ-DDI
_Fetch, validate, convert & serve CESSDA-compliants DDI repositories._
By:
* [Agnieszka Slusarczyk](mailto:agneska@agneska.fr)
* [Emmanuel Raviart](mailto:emmanuel@raviart.com)
Copyright (C) 2020 Cepremap & Progedo
https://git.nomics.world/progedo/ez-ddi.git
> EZ-DDI is free software; you can redistribute it and/or modify
> it under the terms of the GNU Affero General Public License as
> published by the Free Software Foundation, either version 3 of the
> License, or (at your option) any later version.
>
> EZ-DDI is distributed in the hope that it will be useful,
> but WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU Affero General Public License for more details.
>
> You should have received a copy of the GNU Affero General Public License
> along with this program. If not, see <http://www.gnu.org/licenses/>.
# EZ-DDI
_Fetch, validate, convert & serve CESSDA-compliants DDI repositories._
## Installation
```bash
git clone https://git.nomics.world/progedo/ez-ddi.git
cd ez-ddi/
npm install
```
## Usage
```bash
npx tsc
node src/scripts/retrieve_nesstar_ddis.js --url http://nesstar.progedo-adisp.fr/ ../public_data/adisp-ddi/
```
This diff is collapsed.
{
"name": "ez-ddi",
"version": "0.0.1",
"description": "Fetch, validate, convert & serve CESSDA-compliants DDI repositories",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "https://git.nomics.world/progedo/ez-ddi.git"
},
"author": "DBnomics Team",
"license": "AGPL-3.0-or-later",
"devDependencies": {
"@types/command-line-args": "^5.0.0",
"@types/fs-extra": "^9.0.5",
"@types/node": "^14.14.12",
"@types/node-fetch": "^2.5.7",
"prettier": "^2.2.1",
"typescript": "^4.1.2"
},
"dependencies": {
"command-line-args": "^5.1.1",
"fast-xml-parser": "^3.17.5",
"fs-extra": "^9.0.1",
"node-fetch": "^2.6.1",
"node-stream-zip": "^1.12.0",
"query-string": "^6.13.7"
}
}
module.exports = {
semi: false,
trailingComma: "all",
}
import commandLineArgs from "command-line-args"
import xmlParser from "fast-xml-parser"
import fs from "fs-extra"
import fetch from "node-fetch"
import StreamZip from "node-stream-zip"
import path from "path"
import queryString from "query-string"
import stream from "stream"
import util from "util"
const optionsDefinitions = [
{
alias: "s",
help: "don't log anything",
name: "silent",
type: Boolean,
},
{
alias: "u",
help: "base URL of Nesstar server",
name: "url",
type: String,
},
{
alias: "v",
help: "verbose logs",
name: "verbose",
type: Boolean,
},
{
defaultOption: true,
help: "directory to store XML DDI Codebook files",
name: "xmlDir",
type: String,
},
]
const options = commandLineArgs(optionsDefinitions)
const pipeline = util.promisify(stream.pipeline)
async function fetchJsonFromXml(url: string): Promise<any> {
fs.ensureDir(options.xmlDir)
const response = await fetch(url)
const xmlText = await response.text()
let data
try {
data = xmlParser.parse(
xmlText,
{ ignoreAttributes: false, attributeNamePrefix: "@" },
true,
)
} catch (error) {
if (!options.silent) {
console.warn(`File at ${url} can not be parsed: ${error.message}`)
}
throw error
}
if (options.verbose) {
console.log("GET", url)
console.log(JSON.stringify(data, null, 2))
}
return data
}
async function main(): Promise<void> {
const query = queryString.stringify({
"http://www.nesstar.org/rdf/method":
"http://www.nesstar.org/rdf/common/ServerHome/findAll",
})
const serverHomeUrl = new URL(
`obj/cServerHome/ServerHome?${query}`,
options.url,
).toString()
const serverHomeData = await fetchJsonFromXml(serverHomeUrl)
const serverUrl = serverHomeData["r:RDF"]["p2:Server"]["@r:about"]
const serverData = await fetchJsonFromXml(serverUrl)
const catalogsUrl =
serverData["r:RDF"]["p2:Server"]["n19:catalogs"]["@r:resource"]
const catalogsData = await fetchJsonFromXml(catalogsUrl)
const catalogSummary = catalogsData["r:RDF"]["p4:Catalog"]
if (!options.silent) {
console.log("Catalog:", catalogSummary["s:label"])
}
const catalogUrl = catalogSummary["@r:about"]
const catalogData = await fetchJsonFromXml(catalogUrl)
const catalog = catalogData["r:RDF"]["p4:Catalog"]
const datasetsUrl = catalog["n24:datasets"]["@r:resource"]
const datasetsData = await fetchJsonFromXml(datasetsUrl)
const studiesSummary = datasetsData["r:RDF"]["p4:Study5"]
for (const studySummary of studiesSummary) {
const studyUrl = studySummary["@r:about"]
if (!options.silent) {
console.log(
"Study:",
studySummary["n:creationDate"],
studySummary["s:label"],
studyUrl,
)
}
const studyUrlSegments = studyUrl.split("/")
const studyName = studyUrlSegments[studyUrlSegments.length - 1]
const ddiQuery = queryString.stringify({
// gs: "",
gzip: false,
format: "xml",
// language: "",
mode: "transform",
// s: "",
study: studyUrl,
})
const ddiUrl = new URL(
`webview/velocity/xml.zip?${ddiQuery}`,
options.url,
).toString()
const ddiZipFilePath = path.join(options.xmlDir, studyName + ".zip")
const response = await fetch(ddiUrl)
if (!response.ok) {
console.error(response.status, response.statusText)
console.error(await response.text())
throw new Error(`Fetch failed at ${ddiUrl}`)
}
await pipeline(response.body, fs.createWriteStream(ddiZipFilePath))
const ddiZipFileStats = await fs.stat(ddiZipFilePath)
if (ddiZipFileStats.size > 0) {
const zip = new StreamZip({
file: ddiZipFilePath,
storeEntries: true,
})
await new Promise((resolve, reject) => {
zip.on("ready", () => {
// Patch following line, because of wrong TypeScript definition for zip.extract.
// zip.extract(null, options.xmlDir, (err: any, _count: number) => {
zip.extract(null!, options.xmlDir, (err: any) => {
zip.close()
if (err) {
reject(err)
} else {
resolve(null)
}
})
})
})
}
await fs.remove(ddiZipFilePath)
}
}
main()
.then(() => {
process.exit(0)
})
.catch((error) => {
console.error(error)
process.exit(1)
})
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig.json to read more about this file */
/* Basic Options */
// "incremental": true, /* Enable incremental compilation */
"target": "es5" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */,
"module": "commonjs" /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */,
// "lib": [], /* Specify library files to be included in the compilation. */
// "allowJs": true, /* Allow javascript files to be compiled. */
// "checkJs": true, /* Report errors in .js files. */
// "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */
// "declaration": true, /* Generates corresponding '.d.ts' file. */
// "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */
// "sourceMap": true, /* Generates corresponding '.map' file. */
// "outFile": "./", /* Concatenate and emit output to single file. */
// "outDir": "./", /* Redirect output structure to the directory. */
// "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */
// "composite": true, /* Enable project compilation */
// "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */
// "removeComments": true, /* Do not emit comments to output. */
// "noEmit": true, /* Do not emit outputs. */
// "importHelpers": true, /* Import emit helpers from 'tslib'. */
// "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */
// "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */
/* Strict Type-Checking Options */
"strict": true /* Enable all strict type-checking options. */,
// "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* Enable strict null checks. */
// "strictFunctionTypes": true, /* Enable strict checking of function types. */
// "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */
// "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */
// "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */
// "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */
/* Additional Checks */
// "noUnusedLocals": true, /* Report errors on unused locals. */
// "noUnusedParameters": true, /* Report errors on unused parameters. */
// "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */
// "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */
// "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */
/* Module Resolution Options */
// "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */
// "baseUrl": "./", /* Base directory to resolve non-absolute module names. */
// "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */
// "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */
// "typeRoots": [], /* List of folders to include type definitions from. */
// "types": [], /* Type declaration files to be included in compilation. */
// "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */
"esModuleInterop": true /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */,
// "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
/* Source Map Options */
// "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */
// "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */
/* Experimental Options */
// "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */
// "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */
/* Advanced Options */
"skipLibCheck": true /* Skip type checking of declaration files. */,
"forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment