Skip to content
Snippets Groups Projects
Commit 3b165da1 authored by Slater-Victoroff's avatar Slater-Victoroff
Browse files

Replaced troublesome pyes integration with direct calls made to elasticsearch rest api

parent d1be90bb
No related merge requests found
{
"analyzer": {
"transcript_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["protected", "asciifolding", "custom_word_delimiter", "lowercase", "custom_stemmer", "shingle"],
"char_filter": ["custom_mapping"]
}
},
"filter" : {
"custom_word_delimiter":{
"type": "word_delimiter",
"preserve_original": "true"
},
"custom_stemmer": {
"type": "stemmer",
"name": "english"
},
"protected": {
"type": "keyword_marker",
"keywords_path": "protectedWords.txt"
}
},
"char_filter": {
"custom_mapping": {
"type": "mapping",
"mappings": ["\n=>-"]
}
}
}
\ No newline at end of file
import requests
import json
class ElasticDatabase:
def __init__(self, url, index_settings_file, *args):
"""
Will initialize elastic search object with any indices specified by args
specifically the url should be something of the form `http://localhost:9200`
importantly do not include a slash at the end of the url name.
args should be a list of dictionaries, each dictionary specifying a JSON mapping
to be used for a specific type.
Example Dictionary:
{"index": "transcript", "type": "6-002x", "mapping":
{
"properties" : {
"searchable_text": {
"type": "string",
"store": "yes",
"index": "analyzed"
}
}
}
}
Eventually we will support different configuration files for different indices, but
since this is only indexing transcripts right now it seems excessive"""
self.url = url
self.args = args
self.index_settings = open(index_settings_file, 'rb').read()
def parse_args(self):
for mapping in self.args:
try:
json_mapping = json.loads(mapping)
except ValueError:
print "Badly formed JSON args, please check your mappings file"
break
try:
index = json_mapping['index']
type_ = json_mapping['type']
mapping = json_mapping['mapping']
self.setup_index(index)
self.setup_type(index, type_, mapping)
except KeyError:
print "Could not find needed keys. Keys found: "
print mapping.keys()
continue
def setup_type(self, index, type_, json_mapping):
"""
json_mapping should be a dictionary starting at the properties level of a mapping.
The type level will be added, so if you include it things will break. The purpose of this
is to encourage loose coupling between types and mappings for better code
"""
full_url = "/".join([self.url, index, type_, "_mapping"])
json_put_body = {type_: json_mapping}
requests.put(full_url, data=json_put_body)
def has_index(self, index):
"""Checks to see if a given index exists in the database returns existance boolean,
If this returns something other than a 200 or a 404 something is wrong and so we error"""
full_url = "/".join([self.url, index])
status = requests.head(full_url).status_code
if status == 200:
return True
if status == 404:
return False
else:
print "Got an unexpected reponse code: " + str(status)
raise
def setup_index(self, index):
"""Creates a new elasticsearch index, returns the response it gets"""
full_url = "/".join(self.url, index) + "/"
return requests.put(full_url, data=self.index_settings)
def index_data(self, index, type_, id_, data):
"""Data should be passed in as a dictionary, assumes it matches the given mapping"""
full_url = "/".join([self.url, index, type_, id_])
response = requests.put(full_url, json.dumps(data))
return json.loads(response)['ok']
def get_index_settings(self, index):
"""Returns the current settings of """
full_url = "/".join([self.url, index, "_settings"])
return json.loads(requests.get(full_url)._content)
def get_type_mapping(self, index, type_):
full_url = "/".join([self.url, index, type_, "_mapping"])
return json.loads(requests.get(full_url)._content)
def index_data(self, index, type_, id_, json_data):
full_url = "/".join([self.url, index, type_, id_])
requests.put(full_url, data=json_data)
......@@ -5,20 +5,7 @@
"index": "analyzed",
"store": "yes",
"type": "string",
"term_vector": "with_positions_offsets"
},
"phonetic_text": {
"boost": 1.0,
"index": "analyzed",
"store": "yes",
"type": "string",
"term_vector": "with_positions_offsets"
},
"uuid": {
"index": "not_analyzed",
"store": "yes",
"type": "string"
"term_vector": "with_positions_offsets",
"analyzer": "transcript_analyzer"
}
}
\ No newline at end of file
"gauss",
"stokes",
"navier",
"einstein",
"goddard",
"oppenheimer",
"bloch",
"hawkings",
"newton",
"bohr",
"darwin",
"planck",
"rontgen",
"tesla",
"franklin"
\ No newline at end of file
{
"settings": {
"index": {
"number_of_replicas": 2,
"number_of_shards": 3
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment