
import string, csv, sys, time, platform
import hashlib
import json
import io
import re

import KCALLMUtilities as KCATraces


def computeKey(df, idx):

    m = hashlib.md5()
    gtin = df["V_GTIN"][idx]
    if gtin != "":
        sn = gtin
    else:
        sn = df["V_GTIN"][idx] + df["V_Name"][idx] + df["V_Comment"][idx] + df["V_Trademark"][idx]
    b = sn.encode('utf-8')
    m.update(b)
    skey = m.hexdigest()

    return skey


def removeExoticMarkers(iStr):
    
    if iStr == None:
        return None
    n = iStr.replace("&quot;","'")
    n = re.sub(r"[\n\r]*", "", n)
    n = re.sub(r"[®]*", "", n)
    n = n.replace("\\", "")
    
    return n


def setRowID(dataRow, codeOrigin):

    ext = "XXXX"
    try:
        name = dataRow['V_NormName']
        comment = dataRow['V_NormComment']
        trademark = dataRow['V_NormTrademark']
        gtin = dataRow['V_GTIN']

        if name != "":
            
            ext = name + comment + trademark + gtin

            m = hashlib.md5(ext.encode())
            ext = m.hexdigest()
            
        else:
            
            KCATraces.TRACE_ERR("Empty name, no possible ID")
        
    except:

        KCATraces.TRACE_ERR("Wrong norm name/comment")
        print('---------------------------------------------------------------------------')
        print('ERROR in:')
        print(dataRow)
        print('---------------------------------------------------------------------------')

    txt = codeOrigin + "#" + ext
    dataRow['V_ID'] = txt
    
    return

    
def formatName(iStr):
    
    n = removeExoticMarkers(iStr)

    spl = n.split(" ")
    oStr = ""
    for w in spl:
        w = w.title()
        if w == "Bio":
            w = "BIO"
        elif w == "Uht":
            w = "UHT"
        elif w == "C'Est":
            w = "C'est"
        elif w == "Abc":
            w = "ABC"
        elif w == "De" or w == "À" or w == "D'" or w == "Au" or w == "Aux" or w == "Le" or w == "Les" \
           or w == "E" or w == "Pour" or w == "L'" or w == "La" or w == "En" \
           or w == "D'" or w == "Du" or w == "L'" or w == "Al" or w == "En" \
           or w == "Et"  or w == "Avec":
            w = w.lower()
        oStr = oStr + w + " "
    oStr = oStr[:-1]

    if oStr[-1:] == " ":
        oStr = oStr[:-1]

    oStr = setCamelCaseFormat(oStr)
    
    return oStr


def formatAllergens(sta):

    if sta == "":
        return ""
    rsrc = ""
    aa = sta.split(",")
    alist = ["en:none", "en:milk", "en:soybeans", "en:sulphur-dioxide-and-sulphites", "en:nuts", "en:peanuts",
             "en:lupin", "en:gluten", "en:eggs", "en:mustard", "en:celery", "en:molluscs", "en:crustaceans",
             "en:fish", "en:sesame-seeds", "en:oat"]
    for a in aa:
        if a in alist:
            if rsrc != "":
                rsrc += ","
            rsrc += a
        else:
            # Particular case
            if a.find("en:") >= 0 or a.find("fr:") >= 0:
                KCATraces.TRACE_ERR("Wrong allergen: " + a)

            # Other case
            aa = ""
            lca = a.lower()
            if lca == "lait":
                aa = "en:milk"
            elif lca == "soja":
                aa = "en:soybeans"
            elif lca == "fruits à coques":
                aa = "en:nuts"
            elif lca == "oeuf":
                aa = "en:eggs"
            elif lca == "avoine":
                aa = "en:oat"
            elif lca == "arachides":
                aa = "en:peanuts"
            elif lca == "sulfites":
                aa = "en:sulphur-dioxide-and-sulphites"
            elif lca == "moutarde":
                aa = "en:mustard"
            elif lca == "céleri":
                aa = "en:celery"
            elif lca == "poisson":
                aa = "en:fish"
            elif lca == "crustacés":
                aa = "en:crustaceans"
            elif lca == "sésame":
                aa = "en:sesame-seeds"
            if aa != "":
                if rsrc != "":
                    rsrc += ","
                rsrc += aa
                    
    return rsrc


def normalizeConcatenation(name, comment, brand):
    
    normName = normalizeName(name)
    key = normName
    if comment != '':
        key += ' ' + normalizeComment(comment)
        key = key.replace("   ", " ")
    if brand != '':
        key += ' ' + normalizeBrand(brand)
        key = key.replace("   ", " ")

    return key

                            
def normalizeComment(n):

    if n == None:
        return ''

    n = normalizeStd(n)

    return n


def normalizeName(n):

    if n == None:
        return ''
    
    n = normalizeStd(n)

    n = re.sub(r"\(.*?\)", "", n)
    n = re.sub(r"\s\s", " ", n)
    
    return n


def normalizeBrand(n):
    
    if n == None:
        return ''
    
    n = removeExoticMarkers(n)
    n = normalizeStd(n, False)
    n = n.replace(" sas","")
    n = n.replace(" s.a.","")
    
    return n


def normalizeBrandForDisplay(n):

    print()
    print(n)
    s = normalizeStd(n)
    ts = s.split(" ")
    o = ""
    for e in ts:
        if e != "" and e != " ":
            e = e.capitalize()
            o += e
    print(o)
    
    return o


def normalizeBrandForFileNaming(s):

    if s == None:
        return ''
    
    txt = normalizeBrand(s)
    txt = txt.lower()

    # Regex list (order is important)
    allRegEx = [\
        r"\s(sas|sa|plc|p.l.c.|s.a.|s.a.s.)", \
        r"\s(la\s|)(societe)", \
        r"\s(des\sproduits)", \
        r"\s(cooperative)", \
        r"\s(entreprise)", \
        r"\s(enseigne)", \
        ];

    for reg in allRegEx:
        txt = re.sub(reg, "", txt)
        
    txt = txt.rstrip()
    txt = txt.lstrip()
    
    txt = setCamelCaseFormat(txt)
    txt = txt.replace(" ","_")
    
    return txt


def normalizeCategories(n):
    n = n.lower()
    n = n.replace(" ","-")
    n = n.replace("é","e")
    n = n.replace("è","e")
    return n


def normalizeQuantityProposition(exp):

    exp2 = exp

    # Process each word
    allW2 = []
    exp2 = exp2.lower()
    allW = exp2.split(" ")
    for w in allW:
        w2 = normalizeQuantityWord(w)
        if w2 != "":
            allW2.append(w2)

    # Rebuild expression
    exp2 = ""
    for w2 in allW2:
        if exp2 != "":
            exp2 += " "
        exp2 += w2
    exp2 = exp2.replace("  ", " ")
        
    # Reprocess the full sentence
    exp2 = reformatQuantity(exp2)

    return exp2


def normalizeQuantityWord(w):

    # Init
    w2 = w
    if w2 == '':
        return ''

    # Partial replace (order is important)
    wre = {"-":' ', "d'une ":' ', "d'un ":' ', "l'":'', 'ée':'e', 'é':'e', 'è':'e', 'ê':'e', 'ô':'o', 'û':'u', 'î':'i', 'â':'a', 'à':'a'}
    for rr in wre:
        w2 = w2.replace(rr, wre[rr])

    # Remove 's'
    if len(w2) > 3 and w2[-1] == "s" and w2 != "trois" and w2 != "tiers" and w2 != "quarts" and w2 != "tres" and w2 != "plus" and w2 != "gros":
        w2 = w2[0:len(w2)-1]
    wtr = ['d\'unite', 'unite', 'entier', 'entiere', 'le', 'la', 'au']
    if w2 in wtr:
        w2 = ""

    # Morceau case
    w2 = w2.replace("eaux", "eau")

    # Replace full word
    wtw = {'une':'un', "demie":'demi', "grosse":'gros', "normale":'normal', "garnie":'garni', "petite":'petit', "de":'', "du":'', "a":'', "en":''}
    try:
        w2 = wtw[w2]
    except:
        pass
    
    return w2


def reformatQuantity(e):

    # Basic
    txt = e.lower()
    txt = txt.strip()

    # Particular case to ignore
    listToBeIgnored = ["quartier"]
    suffix = "ToBeIgnored#"
    idx = 1
    for eti in listToBeIgnored:
        rr = suffix + str(idx)
        idx += 1
        txt = txt.replace(eti, rr)

    # Partial replace (order is important)
    wre = {'d\'un ':' ', 'd\'':'', ' et ':' ', \
           'un huitieme':'1/8', 'trois huitieme':'3/8', 'cinq huitieme':'5/8', 'sept huitieme':'7/8', \
           'un quart':'1/4', 'trois quarts':'3/4', 'quart':'1/4', \
           'un tiers':'1/3', 'deux tiers':'2/3', \
           'un demi':'1/2', 'demi':'1/2', '0,5':'1/2', \
           'un moitie':'1/2', 'moitie':'1/2', \
           'un':'1',  'deux':'2', 'trois':'3', 'quatre':'4', 'cinq':'5', 'six':'6', 'sept':'7', 'huit':'8', 'neuf':'9', 'dix':'10'}
    for rr in wre:
        txt = txt.replace(rr, wre[rr])

    # Remap particular case to ignore
    idx = 1
    for eti in listToBeIgnored:
        rr = suffix + str(idx)
        idx += 1
        txt = txt.replace(rr, eti)

    txt = txt.strip()
    txt = txt.replace("  ", " ")

    return txt



def normalizeURI(n):
    n = n.lower()

    n = n.replace(" ","-")
    n = n.replace(",","-")

    n = re.sub(r"[é|è|ë|ē|ê]", "e", n)
    n = re.sub(r"[ü|ù|û]", "u", n)
    n = re.sub(r"[à|â|ä]", "a", n)
    n = re.sub(r"[ô|ö]", "o", n)
    n = re.sub(r"[ç]", "c", n)
    n = re.sub(r"[î|ï]", "i", n)
    n = re.sub(r"[œ]", "oe", n)

    n = n.replace("--","-")
    n = n.replace("--","-")

    return n


def normalizeStd(n, isPlurialToBeRemoved = True):
    
    if n == None:
        return None
    
    n = " " + n.lower() + " "
    n = n.replace("-"," ") 
    n = n.replace("™","")

    n = re.sub(r"[é|è|ë|ē|ê]", "e", n)
    n = re.sub(r"[ü|ù|û]", "u", n)
    n = re.sub(r"[à|â|ä]", "a", n)
    n = re.sub(r"[ô|ö]", "o", n)
    n = re.sub(r"[ç]", "c", n)
    n = re.sub(r"[î|ï]", "i", n)
    n = re.sub(r"[œ]", "oe", n)

    n = n.replace(" & "," ")
    n = n.replace(" a "," ")
    n = n.replace(" la "," ")
    n = n.replace(" le "," ")
    n = n.replace(" et "," ")
    n = n.replace(" du "," ")
    n = n.replace(" au "," ")
    n = n.replace(" d'"," ")
    n = n.replace("l'","")
    n = n.replace("'"," ")
    n = n.replace('"'," ")
    n = n.replace(","," ")
    
    n = n.replace("  "," ")
    n = n.replace("  "," ")

    if isPlurialToBeRemoved == True:
        n = removePlurial(n)

    if n != "":
        n = " " + n + " "  # for search
    n = n.replace("  "," ")
    
    return n


def removePlurial(n):

    ws = n.split(' ')

    wordException = ["deux", "noix"]

    wr = []
    for w in ws:
        lw = len(w)
        if w in wordException:
            wr.append(w)
        elif lw > 1:
            # Find if there an 's'
            if w[-1] == 's':
                w = w[0:lw-1]
            elif w[-1] == 'x':
                w = w[0:lw-1]
            wr.append(w)

    res = ''
    for w in wr:
        if res != '':
            res += ' '
        res += w

    return res


def isValidTaxonomy(n):
    if len(n) == 0:
        return 0

    # Particular cases
    if n.find("de:") != -1:
        return 0
    
    s = n.split(",")
    if len(s) <= 1:
        return 0
    return 1


def isValidName(n):
    rc = 1
    s = normalizeName(n)

    # Too long
    #if len(s) > 30:
    #    return 0

    # Strange word
    s = s.lower()
    if s.find("0ml") != -1 or s.find("0cl") != -1 or s.find("5ml") != -1 or \
       s.find("0ml") != -1 or s.find("5cl") != -1 or s.find("1 5") != -1 or \
       s.find(" 1l") != -1 or s.find(" 2l") != -1 or s.find("1 5 l") != -1 :
        return 0
    if s.find("bottle") != -1 or s.find("koffeinfrei") != -1 or s.find(" 201") != -1 or \
       s.find("limitée") != -1 or s.find("limited") != -1 or \
       s.find("original") != -1 or \
       s.find("direktsaft") != -1 or s.find("schorle") != -1 or s.find("manzana ades") != -1 or \
       s.find("moyen") != -1 or s.find("south africa") != -1 or s.find("tcheque") != -1 or \
       s.find(" pck" ) != -1 or s.find(" con" ) != -1 or s.find(" sin ") != -1:
        return 0
    if s.find("(") != -1 or s.find(")") != -1:
        return 0

    # Remove too long string
    s = s.replace("-"," ")
    tabS = s.split(" ")
    if len(tabS) > 8:
        return 0
    
    return rc


def formatTags(tags):

    # Checks
    rTags = []
    allTags = tags.split(',')
    for t in allTags:
        if t != "":
            if t.find("fr:") != -1:
                tag = t.lstrip()
                tag = tag.rstrip()
                rTags.append(tag)
            else:
                KCATraces.TRACE_ERR("Inconsistency in the tags: '" + tags + "'")

    # Concatenate
    rs = ""
    for t in rTags:
        if rs != "":
            rs += ","
        rs += t

    return rs


def formatValue(iStr):

    txt = iStr
    txt = re.sub(r",", ".", txt)
    txt = re.sub(r"[a-z]", "", txt)
    #txt = re.sub(r"\[?*\]", "", txt)
    txt = re.sub(r"<\s", "", txt)
    txt = re.sub(r" ", "", txt)

    return txt

##def formatName(iStr):
##    
##    #Maj
##    n = iStr.replace("&quot;","'")
##    n = n.replace("®", "")
##    n = n.replace("\\", "")
##    spl = n.split(" ")
##    oStr = ""
##    for w in spl:
##        w = w.title()
##        if w == "Bio":
##            w = "BIO"
##        if w == "C'Est":
##            w = "C'est"
##        if w == "Abc":
##            w = "ABC"
##        if w == "De" or w == "À" or w == "D'" or w == "Au" or w == "Aux" or w == "Le" or w == "Les" \
##           or w == "E" or w == "Pour" or w == "L'" or w == "La" or w == "En" \
##           or w == "D'" or w == "Du" or w == "L'" or w == "Al" or w == "En" \
##           or w == "Et"  or w == "Avec":
##            w = w.lower()
##        oStr = oStr + w + " "
##    oStr = oStr[:-1]
##
##    if oStr[-1:] == " ":
##        oStr = oStr[:-1]
##    
##    return oStr


def formatCategories(catPath):
    
    cats = catPath.replace(",","/")
    cats = cats.lower()
    cats = cats.replace(" ","-")
    allCats = cats.split("/")
    newCatPath = ""
    for c in allCats:
        if newCatPath != "":
            newCatPath += "/"
        if c.find("fr:") == -1 and c.find("en:") == -1:
            newCatPath += "en:" + c
        else:
            newCatPath += c
        
    return newCatPath
    

def normalizeServing(iStr):

    err = 0
    oStr = ""
    if iStr == "":
        err = 1
    else:
        oStr = iStr
        oStr = oStr.lower()
        oStr = oStr.replace(" ","")
        oStr = oStr.replace("1000ml","1l")
        oStr = oStr.replace(",",".")
        oStr = oStr.replace("gr","g")
        oStr = oStr.replace("litre","l")
        if len(oStr) > 5 \
           or (oStr.find("g") == -1 \
           and oStr.find("ml") == -1 \
           and oStr.find("cl") == -1 \
           and oStr.find("l") == -1 \
           and oStr.find("kg") == -1):

            # Opportunistic cleaning!
            oStr = oStr.replace("unpotde","")
            oStr = oStr.replace("unverre","")
            oStr = oStr.replace("unverrede","")
            oStr = oStr.replace("1potde","")
            oStr = oStr.replace("unpot","")

            posi_g = oStr.find("g")
            if posi_g >= 0:
                oStr = oStr[:posi_g + 1]
                if len(oStr) != 4:
                    err = 2
            else:
                posi_ml = oStr.find("ml")
                if posi_ml >= 0:
                    oStr = oStr[:posi_ml + 2]
                    if len(oStr) > 5 or len(oStr) <= 2:
                        err = 3
                else:
                    posi_cl = oStr.find("cl")
                    if posi_cl >= 0:
                        oStr = oStr[:posi_cl + 1] + "0"
                        oStr = oStr.replace("cl","ml")
                        if len(oStr) > 6:
                            err = 4
                    else:
                        err = 5
    # Check value
    if err == 0:
        strv = oStr
        strv = strv.replace("g","")
        strv = strv.replace("cl","")
        strv = strv.replace("ml","")
        strv = strv.replace("l","")
        strv = strv.replace("kg","")
        try:
            f = float(strv)
        except:
            oStr = ""
            err = 10
                
    return (err, oStr)


def processSportName(txt):

    oStr = normalizeStd(txt)

    return oStr


def processGenericName(txt):

    oStr = txt.lower()

    return oStr


def normalizeGenericName(txt):

    oStr = txt.lower()

    return oStr


def processName(txt):

    ## ^             #  Represents beginning of a line.
    ## [a-z]         #  Alphabetic character.
    ## .*            #  Any character 0 or more times.
    ## [a-z]         #  Alphabetic character.
    ## $             #  End of a line.
    ## i             #  Case-insensitive match.
    ## g             #  Global.
    ## m             #  Multiline

    # Particular cases (ex: 3X, beer)
    if len(txt) == 2:
        return txt

    # Word only in lower case
    ltxt = txt.lower()
    
    # Remove text between parenthesis
    ltxt = re.sub(r"\(.*?\)", "", ltxt)
    ltxt = re.sub(r"\s\s", " ", ltxt)
    ltxt = re.sub(r"-", " ", ltxt)
    ltxt = re.sub(r" % ", "% ", ltxt)
    ltxt = re.sub(r"œ", "oe", ltxt)
    ltxt = re.sub(r"[^a-zA-Z0-9%\s\'àâôîïéèêûü]","",ltxt)
    #ltxt = re.sub(r"^[^<>{}\"/|;:.,~!?@#$%^=&*\\]\\\\()\\[¿§«»ω⊙¤°℃℉€¥£¢¡®©0-9_+]*$", "", ltxt)

    ltxt = removeExoticMarkers(ltxt)
    ltxt = ltxt.replace(";",",")

    # Regex list (order is important)
    allRegEx = [\
        r"\s?canette", \
        r"\s?bouteille", \
        r"\s?sans\sprécision", \
        r"(tablettes?|)\s?[0-9]+(\s|)(x|X)(-|)(\s|$)", \
        r"\s?-[0-9][0-9]$", \
        r"\s?[0-9]+gx[0-9]+", \
        r"(\s?|\+\s?)[0-9]+\s?%$", \
        r"\s?+offre\s(decouverte|sp(é|e)ciale)", \
        r"\s?+maxi\sformat", \
        r"\s?\+[0-9]+\s?offerts?$", \
        r"\s?(dont|\+|[0-9]+)\s?+[0-9]+\s?offerts?$", \
        r"\s?prix\schoc", \
        r"\s?[0-9]+cc$", \
        r"\s?[0-9]+personnes?$", \
        r"\s?format\sfamilial", \
        r"\s?[0-9]+\s?(parts?|sachets?|capsules?)", \
        r"\s?[0-9](ème|er)\s(â|a)ge", \
        r"\s?(de|)\s?[0-9]+\s?(mois|ans?|)\s?à\s?[0-9]+\s?(mois|ans?)(\s|$)", \
        r"\s?d(ès|e)\s?[0-9]+(/[0-9]|)\s?(mois|ans?)(\s|$)", \
        r"\s?(étui|(paquet|colis\sde|sachet|sachet\ssouple)|vrac|boîte|sac|)(\sde\s|\s-\s|\s)([0-9]+?(x|.|)[0-9]+|[0-9]+)\s?(g|gr|grs|l|kg|ml|cl|dz)(\snet|(\s|$))", \
        r"\s?carton\sde\s[0-9]", \
        r"\s?[0-9]+\s?(oz|litre|l|gramme(s|)|gr|kilogramme(s|)|kg)$", \
        r"\s(bv|a|u|,|-|--)$", \
        r"\s[x|]\s?[0-9]+x$", \
        r"\s[x]\s?[0-9]", \
        r"\.$", \
        ];

    # Debug
    #if ltxt.find("GALLIA CALISMA") != -1:
    #    print("'" + ltxt + "'")
        
    # Test
    nbRegExError = 0
    for i in range(1): # due to regex tab order
        for r in allRegEx:
            try:
                tst = re.search(r, ltxt)
                if tst != None:
                    oo = tst.group()

                    # Search position
                    posi = ltxt.find(oo)
                    lg = len(ltxt)
                    ltxt = ltxt[0:posi] + ltxt[posi+len(oo):lg]
            except re.error:
                nbRegExError += 1

            # Redondant whitespace
            ltxt = ltxt.rstrip()

    # Camel case
    ltxt = setCamelCaseFormat(ltxt)

    return ltxt



def processBrand(txt):

    txt = removeExoticMarkers(txt)
    txt = txt.replace(";",",")
    ltxt = txt.lower()

    txt = setCamelCaseFormat(txt)
    
    return txt


def processIngredients(s):

    # Strange in OFF... many useless '_'
    txt = s
    txt = txt.lower()
    txt = re.sub(r"[\n\r;_]*", "", txt)
    txt = txt.replace("  ", " ")
    
    return txt

def processComment(txt):

    txt = removeExoticMarkers(txt)
    txt = txt.lower()
    txt = txt.replace(";",",")

    return txt


def isRegexCompliant(title, regexTab):

    regPosi = -1
    idx = 0
    lastPosi = -1
    for regex in regexTab:
        p = re.match(regex, title)
        if p:
            lastPosi = p.span(1)[1]
            regPosi = idx
        idx += 1
            
    return regPosi


def simplifyComment(iTxt):

    # Special case
    txt = iTxt.replace('""',"%%")

    # Regex list (order is important)
    allRegEx = [\
        r"((\s|^)cuit(s|es|e|))", \
        #r"((\s|^)cuit(s|es|e|))(^(\sau\sfour|\sà\sla\svapeur|\sà\sl'eau))", \
        #r"((\s|^)cru(s|es|e|)(,|\s|$))", \            # By default, all food is cooked
        r"((\s|^)appertisé(s|es|e|))", \
        r"(préemballé(s|es|e|))", \
        r"((\s|^)rayon\sfrais)", \
        r"((\s|^)sans\sprécision)", \
        r"((\s|^)aliment\smoyen)", \
        r"((\s|^)à\sréchauffer)", \
        r"((\s|^)et autres)", \
        r"((\s|^)prêt\sà\sconsommer)", \
        r"((\s|^)etc)", \
        ];

    for reg in allRegEx:
        txt = re.sub(reg, "", txt)

    # Finish
    txt = txt.lstrip()
    txt = txt.rstrip()
    txt = txt.strip(',')
    txt = txt.lstrip()
    txt = txt.rstrip()
    txt = txt.strip(',')
    txt = txt.strip('"')
    txt = txt.replace("%%",'"')
    txt = txt.replace(",,",",")
    txt = txt.replace(",,",",")
    txt = txt.replace(", ,",",")

    # Reformat special case
    txt = txt.replace(" ,",", ")
    txt = txt.replace("  "," ")

    return txt



def setSimpleCamelCaseFormat(iStr):

    # Prepare format before split    
    iText = iStr.replace('-', ' ') .replace("'", ' ')
    wre = {"-":' ', 'é':'e', 'è':'e', 'ê':'e', 'ô':'o', 'û':'u', 'î':'i', 'â':'a', 'à':'a'}
    for rr in wre:
        iText = iText.replace(rr, wre[rr])

    # Split
    tab = iText.split(" ")

    # Convert
    nbw = len(tab)
    for i in range(nbw):
        t = tab[i]
        if len(t) > 0:
            lt = t.lower()
            tab[i] = lt.capitalize()
        else:
            tab[i] = ''
    txt = ''.join(tab)

    return txt

def setCamelCaseFormat(iStr):
    
    tab = iStr.split(" ")
    nbw = len(tab)
    for i in range(nbw):
        t = tab[i]
        if len(t) > 0:
            lt = t.lower()
            if lt in ['à', 'de', 'ou', 'des', 'et', 'pour', 'ou', 'au', 'aux', 'la', 'en', 'sans', 'du'] and i!= 0:
                tab[i] = lt
            elif lt in ['aop', 'bio', 'mg', 'uht']:
                tab[i] = lt.upper()
            else:
                tab[i] = lt.capitalize()
        else:
            tab[i] = ''
    txt = ""
    for i in range(nbw):
        txt += tab[i]
        if i != nbw-1 and tab[i] != '':
            txt += " "

    # Process  the '
    tab = txt.split("'")
    nbw = len(tab)
    for i in range(1,nbw):
        tab[i] = tab[i][:1].capitalize() + tab[i][1:]
            
    txt = ''
    for i in range(nbw):
        txt += tab[i]
        if i != nbw-1 and tab[i] != '':
            txt += "'"

    # Particular cases (ex: D'orange)
    txt = txt.replace("D'","d'")
    txt = txt.replace("L'","l'")
            
    return txt

def processServingSize(txt):

    # Misc
    oo = ""
    if txt == "":
        return oo

    # Prepare string
    txt = txt.lower()
    nbc = len(txt)
    if txt[0:2] == "1 ":
        txt = txt[1:nbc]
        txt = "une" + txt
    txt = txt.replace("**", "")
    txt = txt.replace("?²?", "")
    txt = txt.replace("?³?", "")
    txt = txt.replace("¹", "")
    txt = txt.replace("²", "")
    nbc = len(txt)
    if txt[nbc-1] == "." or txt[nbc-1] == "*":
        txt = txt[:-1]
    txt = txt.replace("grm", "g")
    txt = txt.replace("grs", "g")
    txt = txt.replace("é", "e")
    txt = txt.replace("è", "e")
    txt = txt.replace(" grammes", "g")
    txt = txt.replace(" gramme", "g")
    txt = txt.replace("pour 1 ", "pour une ")
    txt = txt.replace("pour un ", "pour une ")
    txt = txt.replace("pour 2 ", "pour deux ")
    txt = txt.replace(" g.", "g")
    txt = txt.replace(" gr.", "g")
    txt = txt.rstrip()
    txt = txt.lstrip()

    # Regex list
    allRegEx = [\
        r"^[0-9]+$", \
        r"^\([0-9]+\s?+(g|ml|gr)\)$", \
        r"^[0-9]+\s?(g|ml|gr)$", \
        r"^(par|pour|):?\s?(une|deux|trois|)\s?(pot|brique|croque|stick|bouchee|boules?|tranches?|parts?|portions?|cookies?|saucisses?|knackis?|bouchees?|barquettes?|billes?|carres?|madeleines?|barres?|)(\sde\s|\s=\s|\s|)\(?[0-9]+(.|,|)[0-9]\s?(g|ml|gr)\)?$", \
        r"^pour [0-9]+\s?(g|ml|gr) de (produit|produit egoutte)", \
        ];

    # Test
    nbRegExError = 0
    for r in allRegEx:
        try:
            tst = re.search(r, txt)
            if tst != None:
                tst2 = re.search(r"[0-9]+", txt)
                oo = tst2.group()
                break
        except re.error:
            nbRegExError += 1

    #if oo == "":
     #   print("Not found: '" + txt + "'")

    return oo



def isIdenticalBrand(allNormBrand1, allNormBrand2, dbg = 0):
    if dbg != 0:
        print(allNormBrand1)
        print(allNormBrand2)
    for n1 in allNormBrand1:
        for n2 in allNormBrand2:
            if dbg != 0:
                print(n1 + "    " + n2)
            if n1 == n2:
                return 1
    return 0

