From 07aa9730818c69c88221807ce090759571eedf84 Mon Sep 17 00:00:00 2001 From: Tim Repke <repke@mcc-berlin.net> Date: Fri, 15 Dec 2023 19:01:08 +0100 Subject: [PATCH] new nql --- .flake8 | 2 +- requirements.txt | 4 +-- server/api/routes/search.py | 49 +++++++++++-------------------------- 3 files changed, 17 insertions(+), 38 deletions(-) diff --git a/.flake8 b/.flake8 index 7786610..c38856d 100644 --- a/.flake8 +++ b/.flake8 @@ -2,4 +2,4 @@ max-line-length = 122 select = C,E,F,W,B,B9 ignore = E203, E501, W503, E126 -exclude = __init__.py, venv/, config/ \ No newline at end of file +exclude = __init__.py, venv/, config/, scratch/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f0ea220..0930da7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -fastapi==0.104.1 +fastapi==0.105.0 hypercorn==0.15.0 toml==0.10.2 email-validator==2.1.0.post1 @@ -8,4 +8,4 @@ pymitter==0.5.0 uvicorn==0.24.0.post1 python-multipart==0.0.6 aiosmtplib==3.0.1 -nacsos_data[scripts,server,utils] @ git+ssh://git@gitlab.pik-potsdam.de/mcc-apsis/nacsos/nacsos-data.git@v0.12.9 +nacsos_data[scripts,server,utils] @ git+ssh://git@gitlab.pik-potsdam.de/mcc-apsis/nacsos/nacsos-data.git@v0.12.10 diff --git a/server/api/routes/search.py b/server/api/routes/search.py index 93e2a70..f47a0f3 100644 --- a/server/api/routes/search.py +++ b/server/api/routes/search.py @@ -1,18 +1,16 @@ from typing import TYPE_CHECKING import httpx -from nacsos_data.db.crud.items.lexis_nexis import lexis_orm_to_model from nacsos_data.db.schemas import Project, ItemType from pydantic import BaseModel from fastapi import APIRouter, Depends import sqlalchemy.sql.functions as func +from sqlalchemy import select from nacsos_data.util.academic.openalex import query_async, SearchResult -from nacsos_data.db.crud.items import Query -from nacsos_data.db.crud.items.query.parse import GRAMMAR -from nacsos_data.models.items import AcademicItemModel, FullLexisNexisItemModel +from nacsos_data.models.items import AcademicItemModel, FullLexisNexisItemModel, GenericItemModel from nacsos_data.models.openalex.solr import SearchField, DefType, OpType -from sqlalchemy import select +from nacsos_data.util.nql import NQLQuery, NQLFilter from server.util.security import UserPermissionChecker, UserPermissions from server.util.logging import get_logger @@ -85,18 +83,13 @@ async def term_expansion(term_prefix: str, ] -@router.get('/nql/grammar', response_model=str) -async def nql_grammar() -> str: - return GRAMMAR - - class QueryResult(BaseModel): n_docs: int - docs: list[AcademicItemModel] | list[FullLexisNexisItemModel] + docs: list[AcademicItemModel] | list[FullLexisNexisItemModel] | list[GenericItemModel] -@router.get('/nql/query', response_model=QueryResult) -async def nql_query(query: str, +@router.post('/nql/query', response_model=QueryResult) +async def nql_query(query: NQLFilter, page: int = 1, limit: int = 20, permissions: UserPermissions = Depends(UserPermissionChecker('dataset_read'))) -> QueryResult: @@ -106,25 +99,11 @@ async def nql_query(query: str, await session.scalar(select(Project.type).where(Project.project_id == project_id))) if project_type is None: - raise KeyError() - - q = Query(query, project_id=project_id, project_type=project_type) - - stmt = q.stmt.subquery() - cnt_stmt = func.count(stmt.c.item_id) - - if project_type == ItemType.academic: - docs = [AcademicItemModel.model_validate(item.__dict__) - for item in (await session.execute(q.stmt - .offset((page - 1) * limit) - .limit(limit))).scalars().all()] - elif project_type == ItemType.lexis: - docs = lexis_orm_to_model((await session.execute(q.stmt - .offset((page - 1) * limit) - .limit(limit))).mappings().all()) - else: - raise NotImplementedError() - return QueryResult( - n_docs=(await session.execute(cnt_stmt)).scalar(), # type: ignore[arg-type] - docs=docs - ) + raise KeyError(f'Found no matching project for {project_id}. This should NEVER happen!') + + nql = NQLQuery(query, project_id=str(project_id), project_type=project_type) + + n_docs = (await session.execute(func.count(nql.stmt.subquery().c.item_id))).scalar() + docs = await nql.results_async(session=session, limit=limit, offset=(page - 1) * limit) + + return QueryResult(n_docs=n_docs, docs=docs) # type: ignore[arg-type] -- GitLab