import click
from NekUpload.upload import InvenioRDM
from NekUpload.metadata import *
from NekUpload.metadata.user import InvenioUserInfo,InvenioUserInfoFactory
from typing import Dict,List,Tuple,Any
from dataclasses import dataclass,field
from datetime import date
import json
import os
import pathlib
import yaml
from ruamel.yaml import YAML
from io import StringIO
from NekUpload.utils.xml_reader import XMLReader
from NekUpload.utils.hdf5_reader import HDF5Reader
[docs]
@dataclass
class Config:
#use field to ensure independent instances of authors
authors: List[InvenioOrgInfo | InvenioPersonInfo] = field(default_factory=list)
metadata: InvenioMetadata = None
CONTEXT_FILE: pathlib.Path = "config.json"
[docs]
def to_json(self):
data = {
"CONTEXT_FILE": str(self.CONTEXT_FILE)
}
#only jsonify non empty data
if self.metadata:
data["metadata"] = self.metadata.to_json_serialisable()
if self.authors:
data["authors"] = [author.to_json_serialisable() for author in self.authors]
return data
[docs]
@classmethod
def from_json(cls,data: Dict[str,Any]) -> 'Config':
config = Config()
config.CONTEXT_FILE = pathlib.Path(data["CONTEXT_FILE"])
if metadata := data.get("metadata",None):
config.metadata = InvenioMetadata.from_json(metadata)
if authors := data.get("authors",[]):
config.authors = [InvenioUserInfoFactory.create_from_json(author) for author in authors]
return config
[docs]
@classmethod
def from_yaml(cls,data: Dict[str,Any]) -> 'Config':
metadata: Dict[str,Any] = data["metadata"]
title = metadata["title"]
publication_date = metadata["publication_date"]
description = metadata.get("description",None)
publisher = metadata.get("publisher","NekUpload-CLI")
authors: List[Dict[str,Any]] = metadata["authors"]
def get_authors(yaml_data: Dict[str,Any]) -> InvenioUserInfo:
"""Turn yaml representation of authors into json"""
json = yaml_data
identifiers = json.get("identifiers",[])
identifier = None
if identifiers:
#if identifiers have been specified, remove it from json
#redefine it
json.pop("identifiers")
#right now, only orcid is supported
orcid = identifiers.get("orcid")
if orcid:
identifier = Identifier(orcid,IdentifierType.ORCID)
#define author
author = InvenioUserInfoFactory.create_from_json(json)
if identifier:
author.add_identifier(identifier)
return author
author_list = [get_authors(author) for author in authors]
config = Config()
config.authors = author_list
metadata_obj = InvenioMetadata(title,publication_date,author_list)
metadata_obj.add_publisher(publisher)
if description:
#currently no support for description, but will change in future
pass
config.metadata = metadata_obj
return config
@click.group()
@click.option("--config","-c",type=click.Path(dir_okay=False, path_type=pathlib.Path),default="config.json",help="Use specified config file, defaults to config.json")
@click.pass_context
def cli(ctx: click.Context, config: pathlib.Path):
ctx.ensure_object(Config)
try:
with open(config,"r") as f:
ctx.obj = Config.from_json(json.load(f))
except FileNotFoundError:
ctx.obj = Config()
except json.JSONDecodeError:
click.echo(f"Warning: Could not parse {config}. Starting with a fresh config.")
ctx.obj = Config()
ctx.obj.CONTEXT_FILE = config
ctx.call_on_close(lambda: save_config(ctx,config))
[docs]
def save_config(ctx: click.Context,config_file: pathlib.Path):
with open(config_file, "w") as f:
json.dump(ctx.obj.to_json(), f, indent=4)
click.echo(f"Config saved to {config_file}")
@cli.command()
@click.argument('given_name')
@click.argument('last_name')
@click.option('--orcid',help="orcid identifier")
@click.option('--affiliation',help="affiliated with what organisation")
@click.pass_context
def add_author_person(ctx: click.Context,given_name: str,last_name: str,orcid: str=None,affiliation: str=None):
"""Add an author, who is a person
\b
GIVEN_NAME: Given name of author (use quotes if there is a space)
LAST_NAME: Last name of author (use quotes if there is a space)
"""
author = InvenioPersonInfo(given_name,last_name)
if orcid:
author.add_identifier(Identifier(orcid,IdentifierType.ORCID))
config: Config = ctx.obj
config.authors.append(author)
@cli.command()
@click.argument('name')
@click.option('--orcid',help="orcid identifier")
@click.option('--affiliation',help="affiliated with what organisation")
@click.pass_context
def add_author_org(ctx: click.Context,name: str,orcid: str=None,affiliation: str=None):
"""Add an author, who is a organisation
\b
NAME: Name of organisation (use quotes if there is a space)
LAST_NAME: Last name of author (use quotes if there is a space)
"""
author = InvenioOrgInfo(name)
if orcid:
author.add_identifier(Identifier(orcid,IdentifierType.ORCID))
config: Config = ctx.obj
config.authors.append(author)
@cli.command()
@click.argument("title")
@click.option("--pub-date",help="Publication date, defaults to today's date")
@click.pass_context
def add_info(ctx: click.Context,title: str,pub_date: str=None):
"""Add information about datasets to be uploaded
\b
TITLE: dataset title
"""
config: Config = ctx.obj
if not pub_date:
today: date = date.today()
pub_date = today.isoformat()
metadata = InvenioMetadata(title,pub_date,config.authors)
config.metadata = metadata
@cli.command()
@click.option('-u',"--user-config",help="user-defined yaml file containing upload settings")
@click.option("--api-key", envvar="NEKTAR_DB_API_KEY", help="Your API key (or set environment variable NEKTAR_DB_API_KEY)")
@click.option("--host", envvar="NEKTAR_DB_HOST", help="Host name of database (or set environment variable NEKTAR_DB_HOST)")
@click.option("--community-slug", envvar="NEKTAR_DB_COMMUNITY", help="Community id to upload to (or set environment variable NEKTAR_DB_COMMUNITY)")
@click.option("-dir","--directory",type=click.Path(exists=True), help="Directory containing files to be uploaded")
@click.option('-f', '--file', multiple=True, type=click.Path(exists=True), help="Path to a file to be uploaded, can specify multiple e.g. -f file1 -f file2")
@click.pass_context
def upload(ctx: click.Context,api_key: str=None, host: str=None, community_slug: str=None,directory: str=None,file:Tuple[str]=None,user_config:str=None):
"""Validate and upload the files to the specified database
"""
#for this, all data is assumed to be within the user config file
if user_config:
upload_user_config(user_config)
return
if not api_key:
click.echo("Error: API key is required. Set the NEKTAR_DB_API_KEY environment variable or use the --api-key option.")
return
if not host:
click.echo("Error: Host name is required. Set the NEKTAR_DB_HOST environment variable or use the --host option")
return
if not community_slug:
click.echo("Error: Community id is required. Set the NEKTAR_DB_COMMUNITY environment variable or use the --community-slug option")
return
if not directory and not file:
click.echo("Error: No files detected. Set files either via -f or the directory via -dir")
files = None
if directory:
files = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
elif file:
files = [f for f in file]
config: Config = ctx.obj
if not config.metadata:
click.echo("Error: No metadata set. Please use the command add_info")
return
if config.authors == []:
click.echo("Error: No authors set. Please set an author with command add_author_person or add_author_org")
return
db = InvenioRDM()
metadata = config.metadata.get_metadata_payload()
db.upload_files(host,api_key,files,metadata,community_slug)
[docs]
def upload_user_config(user_config_file: str):
yaml: Dict[str,Any] = read_yaml_file(user_config_file)
config = Config.from_yaml(yaml)
db = yaml["database"]
host_name = db["host_name"]
#unsafe key storage atm
api_key = db["api_key"]
community_slug = db["community_slug"]
upload_data = yaml["upload"]
upload_files = upload_data.get("files",[])
upload_dirs = upload_data.get("directories",[])
#collect all files specified in directory
if upload_dirs:
for dir in upload_dirs:
files = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
upload_files.append(files)
db = InvenioRDM()
metadata = {"metadata": config.metadata.get_metadata_payload()}
db.upload_files(host_name,api_key,upload_files,metadata,community_slug)
@cli.command()
@click.pass_context
def list_authors(ctx: click.Context):
"""List all authors."""
config: Config = ctx.obj
for author in config.authors:
click.echo(author)
[docs]
def read_yaml_file(filepath):
"""Reads a YAML file and returns the parsed data.
Args:
filepath: The path to the YAML file.
Returns:
The parsed YAML data as a Python object (usually a dictionary or list),
or None if an error occurs (e.g., file not found, invalid YAML).
"""
try:
with open(filepath, 'r') as file:
yaml_data = yaml.safe_load(file) # Use safe_load to avoid potential security issues
return yaml_data
except FileNotFoundError:
print(f"Error: File not found at {filepath}")
return None
except yaml.YAMLError as e:
print(f"Error parsing YAML: {e}")
return None
@cli.command()
@click.option("--name", "-n", help="Create a yaml configuration template for user with specified name", default="config.yaml")
def quickstart(name: str):
"""Creates a template configuration file for nekupload"""
today = date.today().isoformat()
yaml_data = {
'metadata': {
'title': '<TITLE HERE>',
'publication_date': f'{today}',
'authors': [
{
'type': 'personal',
'given_name': '<NAME>',
'family_name': '<SURNAME>',
'identifiers': {
'orcid': 'xxxx-xxxx-xxxx-xxxx'
}
},
{
'type': 'organizational',
'name': 'Imperial College London'
}
],
'description': 'This is the description'
},
'upload': {
'files': [
'YOUR FILES HERE',
'YOUR FILES HERE'
],
'directories': [
'DIRECTORY CONTAINING FILES'
]
},
'database': {
'host_name': '<YOUR_HOST_NAME_HERE>',
'api_key': '<YOUR_API_KEY_HERE>',
'community_slug': '<YOUR_COMMUNITY_SLUG>'
}
}
yaml_obj = YAML()
yaml_obj.preserve_quotes = True
# Ensure proper indentation
# God (GPT) knows what happens after this point for commenting
# Convert to CommentedMap using StringIO
stream = StringIO()
yaml_obj.dump(yaml_data, stream)
yaml_data = yaml_obj.load(stream.getvalue())
# Add comments
yaml_data.yaml_set_comment_before_after_key('metadata', before='This is a template with minimum required descriptors and some key optionals')
yaml_data.yaml_set_comment_before_after_key('metadata', before='Mandatory fields are denoted with <>')
yaml_data.yaml_set_comment_before_after_key('metadata', before='Refer to documentation for more detail')
metadata = yaml_data['metadata']
metadata.yaml_set_comment_before_after_key('publication_date', before='Defaults to today, of form "YYYY-MM-DD" in string format')
metadata.yaml_set_comment_before_after_key('authors', before='Can have multiple person or oraganisation as authors, edit/delete as neccesary')
metadata["authors"][0].yaml_set_comment_before_after_key("identifiers",before="Identifiers are optional, only orcid is supported")
metadata["authors"][0].yaml_set_comment_before_after_key("identifiers",before="Delete identifiers if not needed")
yaml_data.yaml_set_comment_before_after_key('upload', before='Specify files and directories containing files to upload here')
yaml_data.yaml_set_comment_before_after_key('upload', before='Edit/delete as necessary')
yaml_data.yaml_set_comment_before_after_key('database', before='Information for InvenioRDM database connection')
with open(name, "w") as f:
yaml_obj.dump(yaml_data, f)
@cli.command()
@click.argument("file1")
@click.argument("file2")
@click.argument("merge_file")
def mergeXML(file1: str,file2: str, merge_file: str):
"""Merges two XML files. If repeated first-level elements are detected,
elements from file2 will replace those in file1. Otherwise, will be appended.
\b
file1 (str): Path to XML file
file2 (str): Path to XML file
merge_file (str): Name of file where merged XML file is written to
"""
with XMLReader(file1) as f:
f.merge_first_level_elements_with(file2,merge_file)
@cli.command()
@click.argument("hdf5_file")
@click.argument("output_name")
def dump_to_plain_file(hdf5_file: str,output_name: str):
"""Dump HDF5 file to plain file.
\b
hdf5_file (str): Path to HDF5 file
output_name (str): Name file name
"""
with HDF5Reader(hdf5_file) as f:
f.dump_to_plain_file(output_name)
[docs]
def main():#Entry point
cli()
if __name__ == "__main__":
main()