#Felix Zhang, Period 5
from heapq import *

def main():
#	input = raw_input("Enter German word, phrase, or sentence: ");
	input = "den Mann machen die kleinen Kinder"
	dictionarymeth(input)

def dictionarymeth(input):
	d = {}
	readdict(d, 'dict.txt')
	translation = translate(d, input)

def readdict(hashtable, filename): #Reads dictionary and stores entries in hashtable
	s = open(filename).read().split('\n')[:-1]
	for edge in s:
		lingset = edge.split(' ')
		german = lingset[0]
		english = lingset[1:len(lingset)]
		register(hashtable, german, english)

def register(hashtable, key, value): #Add value to a hashtable key
	if key in hashtable:
		hashtable[key].append(value)
	else:
		hashtable[key] = [value]

def translate(hashtable, input): #Runs all the components on the input
	words = input.split(' ')
	translated = []
	tagged = pospeech(words)
	print "Part of speech tags: ",tagged
	attribs = properties(tagged,hashtable)
	print "Morphological analysis: ",attribs
	agree = nvagree(attribs)
	
	print "Disambiguated after noun-verb agreement: ",agree
	roots = lemmatize(agree)
	print "Lemmatized: ",roots
	translatedroots = lookup(roots, words, hashtable)
	
	print "Root translated: ", translatedroots
	print "NP Chunked English: ", 
	chunkedtranslation = NPchunk(translatedroots, tagged, agree)
	print chunkedtranslation

	print "Inflected (only works before chunking): ",
	conjugated = inflect(words, translatedroots, agree)
	print ""
	print "Assigned an element type: "
	elemented = elementassign(chunkedtranslation)
	print elemented
	print "Assigned priority: "
	priority = priorityassign(elemented)
	print priority
	print "Rearranged to English structure: "
	print sorted(priority)#sorts the list by priority number - rearranges the sentence to English sentence structure
def pospeech(words): #Tags part of speech based on sentence position and capitalization
	caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	wordtags = []
	pronouns = "ich du er sie es ihn ihm ihr uns euch wir ihr ihnen"
	articles = "der die das den dem" #Pronouns and articles are few enough to be hardcoded
#	prepositions = "durch fuer gegen ohne um bei mit nach seit von zu" #Haven't coded these in yet
	for x in words:
		postag = [x, ""]
		if x[0] in caps and words.index(x) is not 0: #If a word in German is capitalized, it is a noun
			postag[1] = "nou"
		elif x.lower() in pronouns: 
			postag[1] = "pn"
		elif words.index(x) > 0 and (wordtags[words.index(x) - 1][1] is "nou" or wordtags[words.index(x)-1][1] is "pn"):
			postag[1] = "ver" #A verb (at least in this program) always follows a noun or pronoun - except past tense, which sometimes works out
#		elif x.lower() in prepositions:
#			postag[1] = "prep"
		elif x.lower() in articles:
			postag[1] = "art"
		wordtags.append(postag)
	for x in wordtags:
		if x[1] is "":
			place = wordtags.index(x)
			if (place < len(wordtags) - 1 and wordtags[place + 1][1] is "nou") or (place > 0 and wordtags[place-1][1] is "ver"):
				x[1] = "adj"  #If a word follows a verb or precedes a noun and wasn't already tagged, it's an adjective
	return wordtags
	
def properties(words,d): #find linguistic properties like case and plurals and past tense

	
	dem = ["dem", [["dat","mas"],["dat","neu"]]] #Stores possible pairs for each article - "dem" can be dative and masculine OR dative and neuter
	der = ["der", [["nom","mas"],["dat","fem"]]]
	die = ["die", [["nom","fem"],["akk","fem"],["nom","pl"], ["akk", "pl"]]]
	das = ["das", [["nom","neu"],["akk","neu"]]]
	den = ["den", [["akk","mas"],["dat", "pl"]]]

	sentence = []
	artset = [der, die, das, den, dem]

	for x in words:
		POS = x[1]
		if POS is "nou":
			modifier = []
			article = []
			posartpairs = []
			posmodpairs = []
			probable1 = []
			
			if words.index(x) is not 0:
				prev = words[words.index(x) - 1]
				hasarticle = True
				while prev[1] != "art" and words.index(prev) is not 0:
					if prev[1] == "nou":
						hasarticle = False
					if prev[1] == "adj":
						modifier = prev #Stores the closest preceding adjective as the modifier
					prev = words[words.index(prev)-1]
				if hasarticle is True:
					article = prev
			if article != []:
				for y in artset:
					if article[0].lower() in y:
						for pair in y[1]:
							posartpairs.append(pair)
				if modifier != []:
					end = modifier[0][len(modifier[0])-2:len(modifier[0])]
					if end == "en": #Generates linguistic pairs based on adjective endings, similar to my artset above
						posmodpairs = ["akk","mas"],["nom","pl"],["akk","pl"],["dat","pl"],["dat","mas"],["dat","fem"],["dat","neu"]
	
					elif modifier[0][len(modifier[0])-1] is "e":
						posmodpairs = ["akk","fem"],["akk","neu"], ["nom","mas"],["nom","fem"],["nom","neu"]
					for b in posmodpairs:
						if b in posartpairs:
							probable1.append(b)
				else:
					probable1 = posartpairs

			if modifier != []:
				sentence.append([modifier, probable1])			
			sentence.append([x, probable1])

		if POS is "ver":
			verb = x[0]
			tense = ""
			pairs = []
			
			if verb[0:2] == "ge": #This prefix in German indicates a verb in the past perfect tense
				tense = "pperfect"
			else:
				tense = "pres"
			l = len(verb)
			end = verb[l-2:l]
			
			if end == "en": #More pairs based on endings - if a verb ends with "en", it can either be first person or third person plural
				pairs = [["1","pl"],["3","pl"]]
			elif end == "st":
				pairs = [["2","sing"]]
			if verb[len(verb)-1] is "t":
				pairs.append(["3","sing"])
				pairs.append(["2","pl"])
			elif verb[len(verb)-1] is "e":
				pairs = [["1","sing"]]
			pairs.append(tense)
			
			sentence.append([x, pairs])
	return sentence

def lemmatize(words): #breaks word down into root form - verbs into infinitive, plurals into singulars
	withroots = []
	for x in words:
		rootposs = []
		poss = x[1]
		root = ""
		word = x[0][0]
		if x[0][1] == "ver":
			tense = x[1][len(x[1]) - 1]
			if tense == "pperfect":
				root = word[word.rfind("ge")+2:word.rfind("t")]+"en"
			else:
				for y in poss[0:len(poss)-1]:
				#something to implement: account for vowel changes, e->i, a -> a umlaut, e->ie
					end = ""
						
					if y == ["3", "pl"] or y == ["1", "pl"]:
						end = "en"
					elif y == ["2", "sing"]:
						end = "st"
					elif y == ["3", "sing"] or y == ["2", "pl"]:
						end = "t"
					elif y == ["1", "sing"]:
						end = "e"
								
					root = word[0:word.rfind(end)] + "en"
					if root not in rootposs:
						rootposs.append(root)
		if x[0][1] == "adj":
			for y in poss:
				if y[0] == "dat" or y == ["akk", "mas"] or y[1] == "pl":
					root = word[0:word.rfind("en")] #removes plural/accusative masculine adjective ending
		#		elif y[1] == "neu": #only applies to indefinite articles like "ein", which I haven't included
		#			if words[words.index(x) - 1] == "arti":
		#				root = word[0:word.rfind("es")]
		#			else:
				else:
					root = word[0:word.rfind("e")] #removes singular ending
				if root not in rootposs:
					rootposs.append(root)
		
		if x[0][1] == "nou":
			for y in poss:
				if y[1] != "pl": #if the noun is not plural, there will be no suffixes, so the word can not be lemmatized any further
					root = word
					if root not in rootposs:
						rootposs.append(root)
				else: #remove any of the possible German plural endings
					if word.rfind("n") == len(word)-1:
						root = word[0:word.rfind("n")]
						if root not in rootposs:
							rootposs.append(root)
					if word.rfind("en") == len(word)-2:
						root = word[0:word.rfind("en")]
						if root not in rootposs:
							rootposs.append(root)
					if word.rfind("er") == len(word)-2:
						root = word[0:word.rfind("er")]
						if root not in rootposs:
							rootposs.append(root)
					if word.rfind("s") == len(word)-1:
						root = word[0:word.rfind("s")]
						if root not in rootposs:
							rootposs.append(root)
					if word.rfind("ern") == len(word)-3:
						root = word[0:word.rfind("ern")]
						if root not in rootposs:
							rootposs.append(root)
		withroots.append([word, rootposs])
	return withroots
		
def lookup(roots, words, hashtable): 
	definitions = []
	possdef = []
	for x in words:
		inroots = False
		word = x
		for y in roots:
			if y[0] == word:
				possdef = []
				inroots = True
				for z in y[1]:
					if z in hashtable.keys(): #look up the root word in hashtable, assuming it is a key
						trans = hashtable[z][0][0]
						if trans not in possdef:
							possdef.append(trans)
				if len(possdef) is 0: #if no definitions are found
					print "No definitions found for " + word + "."
				elif len(possdef) is 1:
					definitions.append([word, possdef[0]])
		if inroots is False:
			if word in hashtable.keys():
				definitions.append([word, hashtable[word][0][0]])
	return definitions
def findinattribs(attribs,word):
	for x in attribs:
		if x[0] == word:
			return x[1]
def NPchunk(translated, tagged, attribs): #groups noun phrases into "chunks" - subject, direct object, verbs - used for English sentences (i.e. after the sentence has been translated, when chunking is actually useful)
	index = 0
	chunks = []

	while index < len(tagged):
		word = tagged[index]
		english = translated[index]
		props = findinattribs(attribs, word)
		
		 
		enpair = [english[1], word[1], props]
		pos = word[1]
		chunk = []
		if pos == "nou":
			chunk.append(enpair)
			if index > 0:
				temp = index - 1
			while (tagged[temp][1] is "adj" or tagged[temp][1] is "art") and temp >= 0: #groups all articles and adjectives preceding the noun into the phrase
				chunk.append([translated[temp][1],tagged[temp][1]])
				temp = temp - 1
			chunk.reverse()
			chunks.append(chunk)
		elif pos != "art" and pos != "nou" and pos != "adj":
			chunks.append(enpair)
		index = index + 1
	return chunks
def nvagree(attribs): #checks if a noun "agrees" in person and case with a verb to reduce ambiguities
        verbindex = 0
	verb = []
	tense = ""
        for x in attribs:
               if x[0][1] == "ver":
                        verb = x
			tense = verb[1][len(verb[1])-1]
			verbindex = attribs.index(verb)
	closest = findclosestsubjects(attribs, verbindex) #takes the two closest nouns to the verb - the one before and the one after

	possprops = []
	if len(closest) == 1:
		number = ""
		attribs[findinlist(attribs,closest[0][0])][1] = [["nom", closest[0][1][0][1]]]
		number = closest[0][1][0][1]
		person = ""
		if closest[0][0][1] == "nou":
			person = "3"
		attribs[verbindex][1] = [[person, number], tense]
		return attribs
	else:
		for x in closest:
			person = ""
			number = ""
			temp = x
			pos = temp[0][1]
			if pos == "nou":
				person = "3"
			for y in temp:
	
				if y[0][0] != "pl":
					number = "sing"
				else:
					number = "pl"
				pair = [[person, number], tense]
				
				match = False
				for z in verb[1]:
					if pair == z:
						match = True
						attribs[findinlist(attribs, x[0])][1] = [["nom", x[1][0][1]]]
						attribs[verbindex][1] = pair
						attribs = eliminateother(attribs, x, closest)
						return attribs
def eliminateother(attribs, sub, closest): #after determining the subject, the program removes the possibility that any other words in the sentence can be the subject
	for x in attribs:
		if x[0][1] == "nou" and x != sub:
			for y in x[1]:
				if y[0]== "nom":
					attribs[attribs.index(x)][1].remove(y)
	return attribs
def findinlist(list, item):
	for x in list:
		if x[0] == item:
			return list.index(x)
def findclosestsubjects(attribs, verbindex):
	decrease = verbindex
	possnouns = []	
	increase = verbindex
 	
        while decrease > 0:
                decrease = decrease - 1
                properties = attribs[decrease][1]
                poss1 = []
		if attribs[decrease][0][1] == "nou":
                	for x in properties:
                        	if x[0] == "nom":
                                	poss1.append(x)
               		if len(poss1) > 0:
                        	possnouns.append([attribs[decrease][0], poss1])
				break
	while increase < len(attribs):
                increase = increase + 1
                properties = attribs[increase][1]
                poss2 = []
		if attribs[increase][0][1] == "nou":
                	for x in properties:
                        	if x[0] == "nom":
					poss2.append(x)
                	if len(poss2) > 0:
                        	possnouns.append([attribs[increase][0], poss2])
				break
        
	return possnouns 
def elementassign(chunked):#assigns one of five element types to a noun phrase - subject, direct object, indirect object, auxiliary verb, or main verb.
	tempelement = ""
	newchunks = []
	for x in chunked:
		tempx = x
		if x[1] == "ver": 
			if x[2][1] == "pres":#all verbs in the present tense will be main verbs
				tempelement = "mverb"
			else:
				tempelement = "auxverb"#auxiliary verbs don't even exist in my program yet
		elif len(x[0]) > 1 and x[len(x)-1][1] == "nou":
			if x[len(x)-1][2][0][0] == "akk":#the element assigner bases assignments only on one of the possibilities, should an ambiguity occur.
				tempelement = "dobj"
			elif x[len(x)-1][2][0][0] == "dat":
				tempelement = "iobj"
			elif x[len(x)-1][2][0][0] == "nom":
				tempelement = "sub"
		tempx.append(tempelement)
		newchunks.append(tempx)

	return newchunks
def priorityassign(elemented):
	
	priority = ""
	assignments = open("priorities.txt").read().split("\n")
	for x in assignments:
		assignments[assignments.index(x)] = assignments[assignments.index(x)].split(" ")
	assignments.remove(assignments[len(assignments)-1])
	for x in elemented:
		element = x[len(x)-1]
		for y in assignments:
			if y[0] == element:
				priority = y[1]
		x.insert(0, priority) #takes the priority from the configuration file, and appends it to the beginning of the chunk.

	return elemented
def inflect(words, translated, attribs):#pretty simplistic, adds -s or -es to plurals and singular verbs
	print ""
	conjugated = []
	for x in translated:
		needsconjug = False
		word = x[0]
		english = x[1]	
		for y in attribs:
			if y[0][0] == word and y[0][1] != "adj":
				needsconjug = True
				if y[0][1] == "ver":
					for z in y[1][0:len(y[1])-1]:
						ending = ''
						if z == ["3","sing"]:
							if english[len(english)-1] == "s" or english[len(english)-1] == "x":
								ending = 'es'
							else:
								ending = 's'
						conjugated.append([english,z,english+ending])

				elif y[0][1] == "nou":
					for z in y[1]:
						ending = ''
						if z[1] == "pl":
							ending = 's'
						conjugated.append([english,z,english+ending])
		if needsconjug is False:
			conjugated.append([english,english])
	for x in conjugated:
		print x,
	return conjugated
	
main()
#Things to do:
#lemmatize - String verbs, vowel changes, imperfect
#Grammar