Commit 1b8b8ece authored by Emmanuel Raviart's avatar Emmanuel Raviart
Browse files

Add support for non-root groups and their code books.

parent ac618ddd
Pipeline #207968 failed with stage
in 2 minutes and 46 seconds
......@@ -82,7 +82,10 @@ npm run prettier
### Indexing DDI files
```bash
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=adisp-ddi --title=\"Archives de données issues de la statistique publique \(ADISP\)\" ../public_data/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=cdsp-ddi --title=\"SciencesPo Centre de données socio-politiques \(CDSP\)\" ../public_data/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=ined-ddi --title=\"Institut national d\'études démographiques \(INED\)\" ../public_data/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=adisp --title=\"Archives de données issues de la statistique publique \(ADISP\)\" ../public_data/adisp-ddi/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=cdsp --title=\"SciencesPo Centre de données socio-politiques \(CDSP\)\" ../public_data/cdsp-ddi/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=ined --title=\"Institut national d\'études démographiques \(INED\)\" ../public_data/ined-ddi/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=ined/gpgsurvey --title=\"Enquête Générations et Genre du projet international Generations and Gender Programme \(GGP\)\" ../public_data/ined-gpgsurvey-ddi/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=ukdataservice --title=\"UK Data Service\" ../public_data/ukdataservice-ddi/
npx babel-node --extensions ".ts" -- src/scripts/index_code_books.ts --path=nsddata --title=\"Norwegian Centre for Research Data \(NSD\)\" ../public_data/nsddata-ddi/
```
......@@ -3,11 +3,14 @@ import xmlParser from "fast-xml-parser"
import fs from "fs-extra"
import path from "path"
import config from "./config"
import { db } from "./database"
import { Node, NodeType } from "./data"
import { walkDir } from "./file_systems"
import type { CodeBook } from "./raw_types/code_books"
const publicDataDir = path.resolve(config.publicDataDir)
class CodeBooksIndexer {
existingAutocompletes: Set<string> = new Set()
existingPaths: Set<string> = new Set()
......@@ -97,7 +100,8 @@ class CodeBooksIndexer {
}
async upsertCodeBook(codeBook: CodeBook, filePath: string): Promise<void> {
const path = [this.path, codeBook["@ID"]].join("/")
filePath = path.relative(path.resolve(publicDataDir), filePath)
const nodePath = [this.path, codeBook["@ID"]].join("/")
const title = codeBook.stdyDscr.citation.titlStmt.titl
await db.none(
dedent`
......@@ -121,12 +125,12 @@ class CodeBooksIndexer {
`,
{
filePath,
path,
path: nodePath,
title,
type: NodeType.CodeBook,
},
)
this.existingPaths.delete(path)
this.existingPaths.delete(nodePath)
const autocomplete = title
await db.none(
......@@ -144,10 +148,10 @@ class CodeBooksIndexer {
`,
{
autocomplete,
path,
path: nodePath,
},
)
this.existingAutocompletes.delete(`${path}|${autocomplete}`)
this.existingAutocompletes.delete(`${nodePath}|${autocomplete}`)
}
}
......@@ -157,13 +161,13 @@ export async function indexCodeBooks(
): Promise<void> {
const indexer = new CodeBooksIndexer(rootPath, new Set([NodeType.CodeBook]))
await indexer.start()
const rootSplitPath = rootPath.split("/").filter(Boolean)
if (await fs.pathExists(path.join(dataDir, ...rootSplitPath))) {
for (const splitPath of walkDir(dataDir, rootSplitPath)) {
if (await fs.pathExists(dataDir)) {
for (const splitPath of walkDir(dataDir)) {
if (!splitPath[splitPath.length - 1]?.endsWith(".xml")) {
continue
}
const xml: string = await fs.readFile(path.join(dataDir, ...splitPath), {
const filePath = path.resolve(path.join(dataDir, ...splitPath))
const xml: string = await fs.readFile(filePath, {
encoding: "utf8",
})
const codeBook = xmlParser.parse(
......@@ -171,7 +175,7 @@ export async function indexCodeBooks(
{ ignoreAttributes: false, attributeNamePrefix: "@" },
// true,
).codeBook
await indexer.upsertCodeBook(codeBook, path.join(...splitPath))
await indexer.upsertCodeBook(codeBook, filePath)
}
}
await indexer.stop()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment