Commit c01e074c authored by Emmanuel Raviart's avatar Emmanuel Raviart
Browse files

Allow to specify the subtree of a dataverse to fetch.

parent 0033523f
......@@ -74,7 +74,7 @@ node --experimental-specifier-resolution=node -- dist/scripts/retrieve_oai-pmh_d
```bash
# data.sciencespo
node --experimental-specifier-resolution=node -- dist/scripts/retrieve_dataverse_ddis.js --url https://data.sciencespo.fr/ --verbose ../public_data/sciencespo-dataverse-ddi/
node --experimental-specifier-resolution=node -- dist/scripts/retrieve_dataverse_ddis.js --tree cdsp --url https://data.sciencespo.fr/ --verbose ../public_data/sciencespo-dataverse-ddi/
```
#### Fetching DDI Files from Nesstar Servers
......
......@@ -24,6 +24,12 @@ const optionsDefinitions = [
name: "silent",
type: Boolean,
},
{
alias: "t",
help: "Subtree of dataverse to parse (dataverse ID)",
name: "tree",
type: String,
},
{
alias: "u",
help: "base URL of Dataverse server",
......@@ -78,6 +84,17 @@ export function auditCommandLineOptions(audit: Audit, data: any): [any, any] {
auditBoolean,
auditSetNullish(false),
)
for (const key of ["tree", "xmlDir"]) {
audit.attribute(
data,
key,
true,
errors,
remainingKeys,
auditTrimString,
auditRequire,
)
}
audit.attribute(
data,
"url",
......@@ -96,15 +113,6 @@ export function auditCommandLineOptions(audit: Audit, data: any): [any, any] {
auditBoolean,
auditSetNullish(false),
)
audit.attribute(
data,
"xmlDir",
true,
errors,
remainingKeys,
auditTrimString,
auditRequire,
)
return audit.reduceRemaining(data, errors, remainingKeys)
}
......@@ -189,12 +197,13 @@ async function* iterDatasets(identifier?: string) {
}
}
async function* iterDataverses() {
console.log("Iterating dataverses")
async function* iterDataverses(identifier?: string) {
console.log(`Iterating dataverses of dataverse "${identifier ?? "root"}"…`)
for (let start = 0; ; ) {
const query = stringifyQuery({
q: "*",
start,
subtree: identifier,
type: "dataverse",
})
const url = new URL(`api/search?${query}`, options.url).toString()
......@@ -218,7 +227,7 @@ async function* iterDataverses() {
async function main(): Promise<void> {
fs.ensureDir(options.xmlDir)
for await (const dataverseSummary of iterDataverses()) {
for await (const dataverseSummary of iterDataverses(options.tree)) {
// console.log(JSON.stringify(dataverseSummary, null, 2))
for await (const datasetSummary of iterDatasets(
dataverseSummary.identifier,
......@@ -226,12 +235,21 @@ async function main(): Promise<void> {
// console.log(JSON.stringify(datasetSummary, null, 2))
// await fetchDatasetMetadataDdi(datasetSummary.global_id)
const dataset = await fetchDataset(datasetSummary.global_id)
for (const fileSummary of dataset.latestVersion.files ?? []) {
// console.log(JSON.stringify(fileSummary, null, 2))
// if (/ddi/g.test(fileSummary.label)) {
if (fileSummary.label.endsWith(".xml")) {
const dataFileSummary = fileSummary.dataFile
await fetchDatafile(dataFileSummary.filename, dataFileSummary.id)
if (dataset !== null) {
let xmlFileFound = false
for (const fileSummary of dataset.latestVersion.files ?? []) {
// console.log(JSON.stringify(fileSummary, null, 2))
// if (/ddi/g.test(fileSummary.label)) {
if (fileSummary.label.endsWith(".xml")) {
const dataFileSummary = fileSummary.dataFile
await fetchDatafile(dataFileSummary.filename, dataFileSummary.id)
xmlFileFound = true
}
}
if (!xmlFileFound) {
console.log(
` No XML file found for dataset ${datasetSummary.global_id}.`,
)
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment