class DatasetLoader(ABC):
name: str = ""
license: str = ""
source_url: str = ""
description: str = ""
def cache_path(self): # ~/.ai-blackteam/datasets/{name}.jsonl
def is_cached(self): # True if cache file exists
def save_cache(self, items): # Write items to JSONL
def load_cache(self): # Read items from JSONL
@abstractmethod
def download(self) -> list[dict]: # You implement this
...
def load(self) -> list[dict]: # Auto-caches on first call
if self.is_cached():
return self.load_cache()
items = self.download()
self.save_cache(items)
return items
def info(self): # Metadata for `dataset list`
return {
"name": self.name,
"license": self.license,
"source": self.source_url,
"description": self.description,
"cached": self.is_cached(),
"count": len(self.load_cache()) if self.is_cached() else None,
}