Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
progedo
Data Catalogue
Commits
c01e074c
Commit
c01e074c
authored
Nov 04, 2021
by
Emmanuel Raviart
Browse files
Allow to specify the subtree of a dataverse to fetch.
parent
0033523f
Changes
2
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
c01e074c
...
...
@@ -74,7 +74,7 @@ node --experimental-specifier-resolution=node -- dist/scripts/retrieve_oai-pmh_d
```
bash
# data.sciencespo
node
--experimental-specifier-resolution
=
node
--
dist/scripts/retrieve_dataverse_ddis.js
--url
https://data.sciencespo.fr/
--verbose
../public_data/sciencespo-dataverse-ddi/
node
--experimental-specifier-resolution
=
node
--
dist/scripts/retrieve_dataverse_ddis.js
--tree
cdsp
--url
https://data.sciencespo.fr/
--verbose
../public_data/sciencespo-dataverse-ddi/
```
#### Fetching DDI Files from Nesstar Servers
...
...
src/scripts/retrieve_dataverse_ddis.ts
View file @
c01e074c
...
...
@@ -24,6 +24,12 @@ const optionsDefinitions = [
name
:
"
silent
"
,
type
:
Boolean
,
},
{
alias
:
"
t
"
,
help
:
"
Subtree of dataverse to parse (dataverse ID)
"
,
name
:
"
tree
"
,
type
:
String
,
},
{
alias
:
"
u
"
,
help
:
"
base URL of Dataverse server
"
,
...
...
@@ -78,6 +84,17 @@ export function auditCommandLineOptions(audit: Audit, data: any): [any, any] {
auditBoolean
,
auditSetNullish
(
false
),
)
for
(
const
key
of
[
"
tree
"
,
"
xmlDir
"
])
{
audit
.
attribute
(
data
,
key
,
true
,
errors
,
remainingKeys
,
auditTrimString
,
auditRequire
,
)
}
audit
.
attribute
(
data
,
"
url
"
,
...
...
@@ -96,15 +113,6 @@ export function auditCommandLineOptions(audit: Audit, data: any): [any, any] {
auditBoolean
,
auditSetNullish
(
false
),
)
audit
.
attribute
(
data
,
"
xmlDir
"
,
true
,
errors
,
remainingKeys
,
auditTrimString
,
auditRequire
,
)
return
audit
.
reduceRemaining
(
data
,
errors
,
remainingKeys
)
}
...
...
@@ -189,12 +197,13 @@ async function* iterDatasets(identifier?: string) {
}
}
async
function
*
iterDataverses
()
{
console
.
log
(
"
Iterating dataverses
…
"
)
async
function
*
iterDataverses
(
identifier
?:
string
)
{
console
.
log
(
`
Iterating dataverses
of dataverse "
${
identifier
??
"
root
"
}
"…`
)
for
(
let
start
=
0
;
;
)
{
const
query
=
stringifyQuery
({
q
:
"
*
"
,
start
,
subtree
:
identifier
,
type
:
"
dataverse
"
,
})
const
url
=
new
URL
(
`api/search?
${
query
}
`
,
options
.
url
).
toString
()
...
...
@@ -218,7 +227,7 @@ async function* iterDataverses() {
async
function
main
():
Promise
<
void
>
{
fs
.
ensureDir
(
options
.
xmlDir
)
for
await
(
const
dataverseSummary
of
iterDataverses
())
{
for
await
(
const
dataverseSummary
of
iterDataverses
(
options
.
tree
))
{
// console.log(JSON.stringify(dataverseSummary, null, 2))
for
await
(
const
datasetSummary
of
iterDatasets
(
dataverseSummary
.
identifier
,
...
...
@@ -226,12 +235,21 @@ async function main(): Promise<void> {
// console.log(JSON.stringify(datasetSummary, null, 2))
// await fetchDatasetMetadataDdi(datasetSummary.global_id)
const
dataset
=
await
fetchDataset
(
datasetSummary
.
global_id
)
for
(
const
fileSummary
of
dataset
.
latestVersion
.
files
??
[])
{
// console.log(JSON.stringify(fileSummary, null, 2))
// if (/ddi/g.test(fileSummary.label)) {
if
(
fileSummary
.
label
.
endsWith
(
"
.xml
"
))
{
const
dataFileSummary
=
fileSummary
.
dataFile
await
fetchDatafile
(
dataFileSummary
.
filename
,
dataFileSummary
.
id
)
if
(
dataset
!==
null
)
{
let
xmlFileFound
=
false
for
(
const
fileSummary
of
dataset
.
latestVersion
.
files
??
[])
{
// console.log(JSON.stringify(fileSummary, null, 2))
// if (/ddi/g.test(fileSummary.label)) {
if
(
fileSummary
.
label
.
endsWith
(
"
.xml
"
))
{
const
dataFileSummary
=
fileSummary
.
dataFile
await
fetchDatafile
(
dataFileSummary
.
filename
,
dataFileSummary
.
id
)
xmlFileFound
=
true
}
}
if
(
!
xmlFileFound
)
{
console
.
log
(
` No XML file found for dataset
${
datasetSummary
.
global_id
}
.`
,
)
}
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment