#!/usr/bin/env python2 from __future__ import print_function import atexit import json import os import shlex import subprocess import sys import time from six.moves.urllib.error import URLError, HTTPError from six.moves.urllib.parse import quote from six.moves.urllib.request import Request, urlopen # TODO(documentation, distribute) class RepositoryException(BaseException): def __init__(self, code, message): super(RepositoryException, self).__init__('code=%d, message="%s"' % (code, message)) self.code = code self.message = message class Client(object): def __init__(self, host='127.0.0.1', port=8080): self.host = host self.port = port self.version = '20160107-1526' # up to date with that repository version def load(self, method, url, data=None, keep_data=False, content_type='text/plain'): url = "http://%s:%d%s" % (self.host, self.port, url) if data is None: request = Request(url) else: if method == 'GET': url += '?' if isinstance(data, dict): url += '&'.join(['%s=%s' % (quote(str(name)), quote(str(value))) for name, value in data.items()]) elif isinstance(data, list): url += '&'.join(['%s' % (quote(str(name)), quote(str(value))) for name, value in data]) elif isinstance(data, str): url += data request = Request(url) elif keep_data: request = Request(url, data=data.encode('utf8'), headers={'Content-Type': content_type}) else: request = Request(url, data=json.dumps(data).encode('utf8'), headers={'Content-Type': 'application/json'}) request.get_method = lambda: method handler = urlopen(request) result = json.loads(handler.read().decode('utf8')) if result['success'] is False: raise RepositoryException(result['code'], result['result']) return result['result'] def get_documents(self): return self.load('GET', '/repository/documents') def get_document(self, docid): return self.load('GET', '/repository/document/%s' % docid) def get_content(self, docid): return self.load('GET', '/repository/document/%s/content' % docid) def get_features(self, docid, feature_names=None): if feature_names is None: return self.load('GET', '/repository/document/%s/features' % docid) else: return self.load('POST', '/repository/document/%s/features' % docid, feature_names) def query_features(self, key_value_pairs): return self.load('GET', '/repository/documents/features', key_value_pairs) def has_feature(self, feature_name, num=10): return self.load('GET', '/repository/documents/present/feature/%s/%d' % (feature_name, num)) def missing_feature(self, feature_name, num=10): return self.load('GET', '/repository/documents/missing/feature/%s/%d' % (feature_name, num)) def false_feature(self, feature_name, num=10): return self.load('GET', '/repository/documents/false/feature/%s/%d' % (feature_name, num)) def get_documents_complex(self, query): return self.load('GET', '/repository/doc-feature-query/ids', query) def get_documents_complex_full(self, query): return self.load('GET', '/repository/doc-feature-query/full', query) def delete_feature(self, docid, feature): return self.load('DELETE', '/repository/document/%s/feature/%s' % (docid, feature)) def delete_features(self, docid, features): return self.load('DELETE', '/repository/document/%s/features' % docid, features) def delete_document(self, docid): return self.load('DELETE', '/repository/document/%s' % docid) def delete_annotation_set(self, docid, name): return self.load('DELETE', '/repository/document/%s/annotation-set/%s' % (docid, name)) def delete_annotation(self, docid, name, annotation_id): return self.load('DELETE', '/repository/document/%s/annotation-set/%s/%s' % (docid, name, annotation_id)) def post_document(self, content): return self.load('POST', '/repository/document', content) def post_document_websays(self, content): return self.load('POST', '/repository/document/websays', content, keep_data=True, content_type='application/xml') def post_document_plaintext(self, content): return self.load('POST', '/repository/document/plaintext', content, keep_data=True, content_type='text/plain') def test_doc(self): return self.load('GET', '/repository/test-doc') def test_doc_str(self, content): return self.load('GET', '/repository/test-doc/%s' % content) def test_doc_post(self, content): return self.load('POST', '/repository/test-doc', content, keep_data=True, content_type='text/plain') def put_features(self, docid, features): return self.load('PUT', '/repository/document/%d/features' % docid, features) def put_annotations(self, docid, annotations): return self.load('PUT', '/repository/document/%d/annotations' % docid, annotations) def put_annotation_set(self, docid, name, content): return self.load('PUT', '/repository/document/%d/annotation-set/%s' % (docid, name), content) class AnnotationGenerator(object): def __init__(self, query): self.query = query def process_document(self, client, document): #print(document['id']) raise NotImplementedError def run(self, client, delay=300): print('waiting for more documents') while True: documents = client.get_documents_complex_full(self.query) if len(documents) > 0: for document in documents: self.process_document(client, document) print('waiting for more documents') else: time.sleep(delay) def test_repository(client): """Run tests on the repository client.""" print(len(client.get_documents())) doc = client.test_doc() print(doc) print(client.get_document(doc)) print(client.get_content(doc)) print(client.get_features(doc)) print(client.get_features(doc, ['testDocument'])) print(client.query_features({"provenance": "testDoc"})) print(client.has_feature('testDocument', 2)) print(client.missing_feature('Websays_sourceType', 2)) print(client.false_feature('Websays_isBestComment', 2)) print(client.get_documents_complex({"testDocument": True})) print(client.get_documents_complex_full({"testDocument": True})) print(client.delete_feature(doc, "misc")) print(client.delete_features(doc, ["junk"])) print(client.delete_annotation_set(doc, 'all')) feature = client.get_document(doc)['annotations']['split'][0]['id'] print(feature) print(client.delete_annotation(doc, 'split', feature)) print(client.put_features(doc, {"x": "y"})) print(client.put_annotations(doc, {"w": []})) print(client.put_annotation_set(doc, 'z', [])) print(client.delete_document(doc)) doc = client.post_document({"name": "x", "content": "x"}) print(client.delete_document(doc)) doc = client.post_document_websays('<senseiClipping><text>x</text></senseiClipping>')[0] print(client.delete_document(doc)) doc = client.post_document_plaintext('x') print(client.delete_document(doc)) def test_annotator(client): annotator = AnnotationGenerator("x"); annotator.run(client) tunnel = None def start_tunnel(): """Create a tunnel to the SENSEI repository. This assumes that you have a ssh key setup to access that tunnel. """ global tunnel if tunnel is None: print('starting tunnel') tunnel = subprocess.Popen('ssh -i ~/.ssh/saphir.id_rsa -N -L8080:139.124.22.35:8080 sensei-proto@saphir2.lidil.univ-mrs.fr 2>&1 ', env=os.environ, shell=True, stdout=sys.stdout, stderr=sys.stderr, stdin=None) def stop_tunnel(): """Stop the running tunnel""" global tunnel if tunnel is not None: print('stopping tunnel') tunnel.terminate() tunnel.communicate() tunnel.wait() tunnel = None atexit.register(stop_tunnel) if __name__ == '__main__': # command-line client import argparse parser = argparse.ArgumentParser(description='Command line repository', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--host', type=str, help='host for connection', default='127.0.0.1') parser.add_argument('--port', type=int, help='port for connection', default=8080) parser.add_argument('--test', action='store_true', help='run a battery of tests using the repository client') parser.add_argument('--test_annotator', action='store_true', help='run tests on the annotation generator') parser.add_argument('--tunnel', action='store_true', help='creates a tunnel to the sensei repository') parser.add_argument('--get_documents', action='store_true', help='list documents') parser.add_argument('--get_document', type=int, help='get document from id') parser.add_argument('--get_content', type=int, help='get document content from id') parser.add_argument('--get_features', type=int, help='get all features from existing document, can be used in conjunction with --list') parser.add_argument('--list', type=str, help='json-encoded list of features to retrieve') parser.add_argument('--query_features', type=str, help='get documents that match a feature query, descibed as a json dictionary') parser.add_argument('--num', type=int, help='number of documents to get when querying features', default=10) parser.add_argument('--has_feature', type=str, help='get documents that have a given feature (use with --num)') parser.add_argument('--missing_feature', type=str, help='get documents missing some feature (use with --num)') parser.add_argument('--false_feature', type=str, help='get documents where those features are false (use with --num)') parser.add_argument('--get_documents_complex', type=str, help='get document ids from complex query (specified as json dict)') parser.add_argument('--get_documents_complex_full', type=str, help='get full documents from complex query (specified as json dict)') parser.add_argument('--delete_feature', type=str, nargs=2, help='delete feature from a document (specify docid, feature name)') parser.add_argument('--delete_features', type=str, nargs=2, help='delete features from a document (specify docid, then json encoded list of features)') parser.add_argument('--delete_document', type=int, help='delete a document') parser.add_argument('--delete_annotation_set', type=str, nargs=2, help='delete annotation sets from a document (specify docid, then name of annotation set)') parser.add_argument('--delete_annotation', type=str, nargs=3, help='delete annotation from a document (specify docid, name of annotation set, and annotation id)') parser.add_argument('--post_document', type=str, help='post a new document in json format (filename or "-" for stdin)') parser.add_argument('--post_document_websays', type=str, help='post a new document in websays xml (filename or "-" for stdin)') parser.add_argument('--post_document_plaintext', type=str, help='post a new document in plain text (filename or "-" for stdin)') parser.add_argument('--test_doc', action='store_true', help='create a new test document and return its id') parser.add_argument('--test_doc_str', type=str, help='create a new test document of a given content and return its id') parser.add_argument('--test_doc_post', type=str, help='create a new test document with arbitrary features and annotations, and with a given content, and return its id') parser.add_argument('--put_features', type=str, nargs=2, help='add features to existing document (specify docid, and features as json dict)') parser.add_argument('--put_annotations', type=str, nargs=2, help='add features to existing document (specify docid, and annotations as json dict)') parser.add_argument('--put_annotation_set', type=str, nargs=3, help='add features to existing document (specify docid, set name as string, and annotations as json dict)') args = parser.parse_args() if args.tunnel: start_tunnel() client = Client(host=args.host, port=args.port) if args.test: test_repository(client) if args.test_annotator: test_annotator(client) elif args.get_documents: print(' '.join([str(x) for x in client.get_documents()])) elif args.get_document: print(json.dumps(client.get_document(int(args.get_document)), indent=4)) elif args.get_content: print(json.dumps(client.get_content(int(args.get_content)), indent=4)) elif args.get_features: if args.list: print(json.dumps(client.get_features(int(args.get_features), json.loads(args.list)), indent=4)) else: print(json.dumps(client.get_features(int(args.get_features)), indent=4)) elif args.query_features: print(json.dumps(client.query_features(json.loads(args.query_features)), indent=4)) elif args.has_feature: print(json.dumps(client.has_feature(args.has_feature, args.num), indent=4)) elif args.missing_feature: print(json.dumps(client.missing_feature(args.missing_feature, args.num), indent=4)) elif args.false_feature: print(json.dumps(client.false_feature(args.false_feature, args.num), indent=4)) elif args.get_documents_complex: print(json.dumps(client.get_documents_complex(json.loads(args.get_documents_complex)), indent=4)) elif args.get_documents_complex_full: print(json.dumps(client.get_documents_complex_full(json.loads(args.get_documents_complex_full)), indent=4)) elif args.delete_feature: print(json.dumps(client.delete_feature(int(args.delete_feature[0]), args.delete_feature[1]), indent=4)) elif args.delete_features: print(json.dumps(client.delete_features(int(args.delete_features[0]), json.loads(args.delete_features[1])), indent=4)) elif args.delete_annotation_set: print(json.dumps(client.delete_annotation_set(int(args.delete_annotation_set[0]), args.delete_annotation_set[1]), indent=4)) elif args.delete_annotation: print(json.dumps(client.delete_annotation(int(args.delete_annotation[0]), args.delete_annotation[1], args.delete_annotation[2]), indent=4)) elif args.delete_document: print(json.dumps(client.delete_document(int(args.delete_document)), indent=4)) elif args.post_document: with sys.stdin if args.post_document == '-' else open(args.post_document) as fp: print(json.dumps(client.post_document(json.loads(fp.read())), indent=4)) elif args.post_document_websays: with sys.stdin if args.post_document_websays == '-' else open(args.post_document_websays) as fp: print(' '.join([str(x) for x in client.post_document_websays(fp.read())])) elif args.post_document_plaintext: with sys.stdin if args.post_document_plaintext == '-' else open(args.post_document_plaintext) as fp: print(json.dumps(client.post_document_plaintext(fp.read()), indent=4)) elif args.test_doc: print(json.dumps(client.test_doc())) elif args.test_doc_str: print(json.dumps(client.test_doc_str(args.test_doc_str))) elif args.test_doc_post: print(json.dumps(client.test_doc_post(args.test_doc_post))) elif args.put_features: print(json.dumps(client.put_features(int(args.put_features[0]), json.loads(args.put_features[1])), indent=4)) elif args.put_annotations: print(json.dumps(client.put_annotations(int(args.put_annotations[0]), json.loads(args.put_annotations[1])), indent=4)) elif args.put_annotation_set: print(json.dumps(client.put_annotation_set(int(args.put_annotation_set[0]), args.put_annotation_set[1], json.loads(args.put_annotation_set[2])), indent=4)) else: parser.print_help()