Skip to content
Snippets Groups Projects
Commit 59df3414 authored by Tim Repke's avatar Tim Repke
Browse files

add buscar

parent 4e191e01
No related branches found
No related tags found
1 merge request!60Master
flake8==6.1.0
tox==4.6.4
pytest==7.4.0
pytest==7.4.2
pytest-cov==4.1.0
mypy==1.4.1
mypy==1.5.1
types-toml==0.10.8.7
types-PyYAML==6.0.12.11
\ No newline at end of file
......@@ -18,7 +18,8 @@ logger = get_logger('nacsos.server')
app = FastAPI(openapi_url=settings.SERVER.OPENAPI_FILE,
openapi_prefix=settings.SERVER.OPENAPI_PREFIX,
root_path=settings.SERVER.ROOT_PATH)
root_path=settings.SERVER.ROOT_PATH,
separate_input_output_schemas=False)
logger.debug('Setting up server and middlewares')
mimetypes.add_type('application/javascript', '.js')
......
......@@ -11,6 +11,7 @@ from .routes import highlight
from .routes import stats
from .routes import export
from .routes import search
from .routes import evaluation
# this router proxies all /api endpoints
router = APIRouter()
......@@ -50,3 +51,6 @@ router.include_router(export.router, prefix='/export', tags=['export'])
# route for searching data (e.g. in openalex)
router.include_router(search.router, prefix='/search', tags=['search'])
# route for computing evaluation metrics and other statistics
router.include_router(evaluation.router, prefix='/eval', tags=['evaluation'])
......@@ -5,76 +5,83 @@ from sqlalchemy import select
from sqlalchemy.orm import load_only
from fastapi import APIRouter, Depends, HTTPException, status as http_status, Query
from nacsos_data.db.schemas import \
BotAnnotationMetaData, \
AssignmentScope, \
User, \
from nacsos_data.db.schemas import (
BotAnnotationMetaData,
AssignmentScope,
User,
Annotation
from nacsos_data.models.annotations import \
AnnotationSchemeModel, \
AssignmentScopeModel, \
AssignmentModel, \
AssignmentStatus, \
AssignmentScopeConfig, \
AnnotationSchemeModelFlat, \
FlattenedAnnotationSchemeLabel
from nacsos_data.models.bot_annotations import \
ResolutionMethod, \
AnnotationFilters, \
BotAnnotationModel, \
AnnotationCollection, \
BotMetaResolve, \
GroupedBotAnnotation, \
AnnotationCollectionDB, \
BotKind, \
BotAnnotationMetaDataBaseModel
)
from nacsos_data.models.annotations import (
AnnotationSchemeModel,
AssignmentScopeModel,
AssignmentModel,
AssignmentStatus,
AssignmentScopeConfig,
AnnotationSchemeModelFlat
)
from nacsos_data.models.bot_annotations import (
BotKind,
BotAnnotationMetaDataBaseModel,
BotAnnotationResolution,
ResolutionMatrix,
BotMetaResolveBase,
ResolutionProposal
)
from nacsos_data.models.users import UserModel
from nacsos_data.models.items import AnyItemModel
from nacsos_data.db.crud.items import read_any_item_by_item_id
from nacsos_data.db.crud.projects import read_project_by_id
from nacsos_data.db.crud.annotations import \
read_assignment, \
read_assignments_for_scope, \
read_assignments_for_scope_for_user, \
read_assignment_scopes_for_project, \
read_assignment_scopes_for_project_for_user, \
read_annotations_for_assignment, \
read_next_assignment_for_scope_for_user, \
read_next_open_assignment_for_scope_for_user, \
read_annotation_scheme, \
read_annotation_schemes_for_project, \
upsert_annotations, \
read_assignment_scope, \
upsert_annotation_scheme, \
delete_annotation_scheme, \
upsert_assignment_scope, \
delete_assignment_scope, \
read_item_ids_with_assignment_count_for_project, \
read_assignment_counts_for_scope, \
ItemWithCount, \
AssignmentCounts, \
UserProjectAssignmentScope, \
store_assignments, \
store_resolved_bot_annotations, \
update_resolved_bot_annotations, read_assignment_overview_for_scope, AssignmentScopeEntry
from nacsos_data.util.annotations.resolve import \
AnnotationFilterObject, \
get_resolved_item_annotations, \
read_bot_annotations, ResolutionProposal
from nacsos_data.util.annotations.validation import \
merge_scheme_and_annotations, \
annotated_scheme_to_annotations, \
from nacsos_data.db.crud.annotations import (
read_assignment,
read_assignments_for_scope,
read_assignments_for_scope_for_user,
read_assignment_scopes_for_project,
read_assignment_scopes_for_project_for_user,
read_annotations_for_assignment,
read_next_assignment_for_scope_for_user,
read_next_open_assignment_for_scope_for_user,
read_annotation_schemes_for_project,
upsert_annotations,
read_assignment_scope,
upsert_annotation_scheme,
delete_annotation_scheme,
upsert_assignment_scope,
delete_assignment_scope,
read_item_ids_with_assignment_count_for_project,
read_assignment_counts_for_scope,
ItemWithCount,
AssignmentCounts,
UserProjectAssignmentScope,
store_assignments,
store_resolved_bot_annotations,
update_resolved_bot_annotations,
read_assignment_overview_for_scope,
AssignmentScopeEntry,
read_resolved_bot_annotations,
read_resolved_bot_annotation_meta,
read_resolved_bot_annotations_for_meta
)
from nacsos_data.util.annotations.resolve import (
AnnotationFilterObject,
get_resolved_item_annotations,
read_annotation_scheme
)
from nacsos_data.util.annotations.validation import (
merge_scheme_and_annotations,
annotated_scheme_to_annotations,
flatten_annotation_scheme
)
from nacsos_data.util.annotations.assignments.random import random_assignments
from nacsos_data.util.annotations.assignments.random_exclusion import random_assignments_with_exclusion
from server.api.errors import \
SaveFailedError, \
AssignmentScopeNotFoundError, \
NoNextAssignmentWarning, \
ProjectNotFoundError, \
AnnotationSchemeNotFoundError, \
from server.api.errors import (
SaveFailedError,
AssignmentScopeNotFoundError,
NoNextAssignmentWarning,
ProjectNotFoundError,
AnnotationSchemeNotFoundError,
MissingInformationError
)
from server.util.security import UserPermissionChecker
from server.data import db_engine
......@@ -108,7 +115,8 @@ async def get_scheme_definition(annotation_scheme_id: str,
:param permissions:
:return: a single annotation scheme
"""
scheme = await read_annotation_scheme(annotation_scheme_id=annotation_scheme_id, db_engine=db_engine)
scheme: AnnotationSchemeModel | None = await read_annotation_scheme(annotation_scheme_id=annotation_scheme_id,
db_engine=db_engine)
if scheme is not None:
if flat:
return flatten_annotation_scheme(scheme)
......@@ -415,104 +423,93 @@ async def get_annotators_for_scheme(scheme_id: str,
.where(Annotation.annotation_scheme_id == scheme_id))).scalars().all()]
class SavedResolutionResponse(BaseModel):
name: str
meta: BotMetaResolve
saved: dict[str, list[GroupedBotAnnotation]]
@router.get('/config/resolve/', response_model=ResolutionProposal)
async def get_resolved_annotations(strategy: ResolutionMethod,
scheme_id: str,
scope_id: list[str] | None = Query(default=None),
user_id: list[str] | None = Query(default=None),
key: list[str] | None = Query(default=None),
repeat: list[int] | None = Query(default=None),
ignore_order: bool | None = Query(default=False),
ignore_hierarchy: bool | None = Query(default=False),
async def get_resolved_annotations(settings: BotMetaResolveBase,
include_empty: bool | None = Query(default=False),
existing_resolution: str | None = Query(default=None),
include_new: bool | None = Query(default=False),
update_existing: bool | None = Query(default=False),
permissions=Depends(UserPermissionChecker('annotations_edit'))):
permissions=Depends(UserPermissionChecker('annotations_edit'))) \
-> ResolutionProposal:
"""
Get all annotations that match the filters (e.g. all annotations made by users in scope with :scope_id).
Annotations are returned in a 3D matrix:
rows (dict entries): items (key: item_id)
columns (list index of dict entry): Label (key in scheme + repeat); index map in matrix.keys
cells: list of annotations by each user for item/Label combination
:param include_new:
:param update_existing:
:param existing_resolution:
:param include_empty:
:param strategy
:param scheme_id:
:param scope_id:
:param user_id:
:param key:
:param repeat:
:param settings
:param permissions:
:param ignore_order:
:param ignore_hierarchy:
:return:
"""
filters = AnnotationFilters(
scheme_id=scheme_id,
scope_id=scope_id,
user_id=user_id,
key=key,
repeat=repeat,
)
if ignore_hierarchy is None:
ignore_hierarchy = False
if ignore_order is None:
ignore_order = False
if include_empty is None:
include_empty = True
if include_new is None:
include_new = False
if update_existing is None:
update_existing = False
return await get_resolved_item_annotations(strategy=strategy,
filters=AnnotationFilterObject.model_validate(filters.model_dump()),
ignore_order=ignore_order,
ignore_hierarchy=ignore_hierarchy,
if existing_resolution is not None:
return await read_resolved_bot_annotations(db_engine=db_engine,
existing_resolution=existing_resolution,
include_new=include_new,
include_empty=include_empty,
update_existing=update_existing)
filters = AnnotationFilterObject.model_validate(settings.filters)
return await get_resolved_item_annotations(strategy=settings.algorithm,
filters=filters,
ignore_repeat=settings.ignore_repeat,
ignore_hierarchy=settings.ignore_hierarchy,
include_new=include_new,
include_empty=include_empty,
update_existing=update_existing,
existing_resolution=existing_resolution,
db_engine=db_engine)
class ResolutionPayload(BaseModel):
name: str
strategy: ResolutionMethod
filters: AnnotationFilters
ignore_order: bool # Refers to `annotation.repeat`, not `assignment.order`!
ignore_hierarchy: bool
collection: AnnotationCollectionDB
bot_annotations: list[BotAnnotationModel]
class SavedResolution(BaseModel):
meta: BotAnnotationResolution
proposal: ResolutionProposal
@router.get('/config/resolved/{bot_annotation_meta_id}', response_model=SavedResolution)
async def get_saved_resolved_annotations(bot_annotation_metadata_id: str,
permissions=Depends(UserPermissionChecker('annotations_edit'))) \
-> SavedResolution:
async with db_engine.session() as session: # type: AsyncSession
bot_meta = await read_resolved_bot_annotation_meta(bot_annotation_metadata_id=bot_annotation_metadata_id,
session=session)
proposal = await read_resolved_bot_annotations_for_meta(session=session,
bot_meta=bot_meta,
include_new=False,
include_empty=False,
update_existing=False)
return SavedResolution(meta=bot_meta, proposal=proposal)
@router.put('/config/resolve/', response_model=str)
async def save_resolved_annotations(data: ResolutionPayload,
async def save_resolved_annotations(settings: BotMetaResolveBase,
matrix: ResolutionMatrix,
name: str,
permissions=Depends(UserPermissionChecker('annotations_edit'))):
meta_id = await store_resolved_bot_annotations(
project_id=permissions.permissions.project_id, name=data.name, algorithm=data.strategy,
filters=data.filters, ignore_hierarchy=data.ignore_hierarchy, ignore_repeat=data.ignore_order,
collection=data.collection, bot_annotations=data.bot_annotations, db_engine=db_engine)
meta_id = await store_resolved_bot_annotations(db_engine=db_engine,
project_id=permissions.permissions.project_id,
name=name,
algorithm=settings.algorithm,
filters=settings.filters,
ignore_hierarchy=settings.ignore_hierarchy,
ignore_repeat=settings.ignore_repeat,
matrix=matrix)
return meta_id
@router.put('/config/resolve/update')
async def update_resolved_annotations(bot_annotation_metadata_id: str,
name: str,
bot_annotations: list[BotAnnotationModel],
matrix: ResolutionMatrix,
permissions=Depends(UserPermissionChecker('annotations_edit'))) -> None:
# TODO: allow update of filters and settings?
await update_resolved_bot_annotations(bot_annotation_metadata_id=bot_annotation_metadata_id,
name=name, bot_annotations=bot_annotations, db_engine=db_engine)
name=name, matrix=matrix, db_engine=db_engine)
@router.get('/config/resolved-list/', response_model=list[BotAnnotationMetaDataBaseModel])
......@@ -535,29 +532,15 @@ async def list_saved_resolved_annotations(permissions=Depends(UserPermissionChec
return [BotAnnotationMetaDataBaseModel.model_validate(e.__dict__) for e in exports]
@router.get('/config/resolved/{bot_annotation_meta_id}', response_model=SavedResolutionResponse)
async def get_saved_resolved_annotations(bot_annotation_metadata_id: str,
permissions=Depends(UserPermissionChecker('annotations_edit'))):
bot_annotations = await read_bot_annotations(bot_annotation_metadata_id=bot_annotation_metadata_id,
db_engine=db_engine)
async with db_engine.session() as session: # type: AsyncSession
meta: BotAnnotationMetaData = (await session.execute(
select(BotAnnotationMetaData)
.where(BotAnnotationMetaData.bot_annotation_metadata_id == bot_annotation_metadata_id))) \
.scalars().one()
return SavedResolutionResponse(
name=meta.name,
meta=meta.meta,
saved=bot_annotations
)
@router.delete('/config/resolved/{bot_annotation_meta_id}')
async def delete_saved_resolved_annotations(bot_annotation_metadata_id: str,
permissions=Depends(UserPermissionChecker('annotations_edit'))):
async with db_engine.session() as session: # type: AsyncSession
meta: BotAnnotationMetaData = (await session.execute(
meta: BotAnnotationMetaData | None = (await session.execute(
select(BotAnnotationMetaData)
.where(BotAnnotationMetaData.bot_annotation_metadata_id == bot_annotation_metadata_id))) \
.scalars().one()
await session.delete(meta)
.scalars().one_or_none()
if meta is not None:
await session.delete(meta)
# TODO: do we need to commit?
# TODO: ensure bot_annotations are deleted via cascade
import uuid
from typing import TYPE_CHECKING
from fastapi import APIRouter, BackgroundTasks, Depends
from nacsos_data.db.crud import upsert_orm
from nacsos_data.db.schemas import AnnotationTracker
from nacsos_data.models.annotation_tracker import AnnotationTrackerModel
from nacsos_data.util.annotations.evaluation import get_new_label_batches
from nacsos_data.util.annotations.evaluation.buscar import (
calculate_h0s_for_batches,
compute_recall,
calculate_h0s)
from nacsos_data.util.annotations.evaluation.label_transform import annotations_to_sequence, get_annotations
from nacsos_data.util.auth import UserPermissions
from sqlalchemy import select
from server.data import db_engine
from server.api.errors import DataNotFoundWarning
from server.util.logging import get_logger
from server.util.security import UserPermissionChecker
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession # noqa F401
logger = get_logger('nacsos.api.route.eval')
logger.debug('Setup nacsos.api.route.eval router')
router = APIRouter()
async def read_tracker(session: AsyncSession, tracker_id: str | uuid.UUID,
project_id: str | uuid.UUID | None = None) -> AnnotationTracker:
stmt = (select(AnnotationTracker)
.where(AnnotationTracker.annotation_tracking_id == tracker_id))
rslt = (await session.scalars(stmt)).one_or_none()
if rslt is None:
raise DataNotFoundWarning(f'No Tracker in project {project_id} for id {tracker_id}!')
return rslt
@router.get('/tracking/tracker/{tracker_id}', response_model=AnnotationTrackerModel)
async def get_tracker(tracker_id: str,
permissions: UserPermissions = Depends(UserPermissionChecker('annotations_read'))) \
-> AnnotationTrackerModel:
async with db_engine.session() as session: # type: AsyncSession
return AnnotationTrackerModel.model_validate(read_tracker(tracker_id=tracker_id, session=session,
project_id=permissions.permissions.project_id))
@router.put('/tracking/tracker', response_model=str)
async def save_tracker(tracker: AnnotationTrackerModel,
permissions: UserPermissions = Depends(UserPermissionChecker('annotations_read'))) -> str:
pkey = await upsert_orm(upsert_model=tracker, Schema=AnnotationTracker,
primary_key='annotation_tracking_id', db_engine=db_engine,
skip_update=['labels', 'recall', 'buscar'])
return str(pkey)
@router.post('/tracking/refresh', response_model=AnnotationTrackerModel)
async def update_tracker(tracker_id: str,
background_tasks: BackgroundTasks,
batch_size: int | None = None,
reset: bool = False,
permissions: UserPermissions = Depends(UserPermissionChecker('annotations_edit'))) \
-> AnnotationTrackerModel:
async with db_engine.session() as session: # type: AsyncSession
tracker = await read_tracker(tracker_id=tracker_id, session=session,
project_id=permissions.permissions.project_id)
batched_annotations = [await get_annotations(session=session, source_ids=[sid])
for sid in tracker.source_ids]
batched_sequence = [annotations_to_sequence(tracker.inclusion_rule, annotations=annotations,
majority=tracker.majority)
for annotations in batched_annotations]
diff: list[list[int]] | None = None
if reset:
tracker.buscar = None
tracker.recall = None
elif tracker.labels is not None:
diff = get_new_label_batches(tracker.labels, batched_sequence)
# Update labels
tracker.labels = batched_sequence
await session.commit()
# We are not handing over the existing tracker ORM, because the session is not persistent
background_tasks.add_task(bg_populate_tracker, tracker_id, batch_size, diff)
return AnnotationTrackerModel.model_validate(tracker)
async def bg_populate_tracker(tracker_id: str, batch_size: int | None = None, labels: list[list[int]] | None = None):
async with db_engine.session() as session: # type: AsyncSession
tracker = await read_tracker(tracker_id=tracker_id, session=session)
if labels is None:
labels = tracker.labels
flat_labels = [lab for batch in labels for lab in batch]
recall = compute_recall(labels_=flat_labels)
if tracker.recall is None:
tracker.recall = recall
else:
tracker.recall += recall
await session.commit()
# Initialise buscar scores
if tracker.buscar is None:
tracker.buscar = []
if batch_size is None:
# Use scopes as batches
it = calculate_h0s_for_batches(labels_=tracker.labels,
recall_target=tracker.recall_target,
n_docs=tracker.n_items_total)
else:
# Ignore the batches derived from scopes and use fixed step sizes
it = calculate_h0s(labels_=flat_labels,
batch_size=batch_size,
recall_target=tracker.recall_target,
n_docs=tracker.n_items_total)
for x, y in it:
tracker.buscar = tracker.buscar + [(x, y)]
# save after each step, so the user can refresh the page and get data as it becomes available
await session.commit()
......@@ -46,7 +46,7 @@ async def get_annotations_csv(labels: list[LabelOptions],
assignment_scope_ids: list[str] | None = Query(default=None),
user_ids: list[str] | None = Query(default=None),
ignore_hierarchy: bool = Query(default=True),
ignore_order: bool = Query(default=True),
ignore_repeat: bool = Query(default=True),
item_fields: list[str] | None = Query(default=None),
permissions: UserPermissions = Depends(UserPermissionChecker('annotations_read'))):
result = await prepare_export_table(bot_annotation_metadata_ids=bot_annotation_metadata_ids,
......@@ -54,7 +54,7 @@ async def get_annotations_csv(labels: list[LabelOptions],
user_ids=user_ids,
project_id=permissions.permissions.project_id,
labels=labels,
ignore_order=ignore_order,
ignore_repeat=ignore_repeat,
ignore_hierarchy=ignore_hierarchy,
item_fields=item_fields,
db_engine=db_engine)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment