#!/usr/bin/env python # -*- coding: utf-8 -*- #------------------------------------------------------------------------------- # Copyright (c) 2010 Sourcefabric O.P.S. # # This file is part of the Airtime project. # http://airtime.sourcefabric.org/ # To report bugs, send an e-mail to contact@sourcefabric.org # # Airtime is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # Airtime is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Airtime; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # This script converts an ICU localization file from Serbian Latin # to Serbian Cyrillic. #------------------------------------------------------------------------------- import sys, re, codecs usageString = 'Usage: serbianLatinToCyrillicConverter.py' \ ' inputfile outputfile' if len(sys.argv) >= 3: fileNameIn = sys.argv[1] fileNameOut = sys.argv[2] else: print usageString sys.exit(1) oldLines = codecs.open(fileNameIn, 'r', 'utf-8').readlines() newLines = [ ] def cyrillize(word): if re.match(r'#.*#\Z', word): return word compound = { u'lj' : u'љ', u'Lj' : u'Љ', u'nj' : u'њ', u'Nj' : u'Њ', u'dž' : u'џ', u'Dž' : u'Џ' } simple = dict(zip(u'abvgdđežzijklmnoprstćufhcčšw', u'абвгдђежзијклмнопрстћуфхцчшв')) simple.update(dict(zip(u'ABVGDĐEŽZIJKLMNOPRSTĆUFHCČŠW', u'АБВГДЂЕЖЗИЈКЛМНОПРСТЋУФХЦЧШВ'))) exceptions = { ur'\н' : ur'\n', u'Фаде ин' : u'Фејд ин', u'Фаде оут' : u'Фејд аут', u'фаде ин' : u'фејд ин', u'фаде оут' : u'фејд аут', u'есцапе' : u'ескејп', u'Плаy' : u'Плеј', u'Паусе' : u'Поуз', u'трацк' : u'трак', u'УРИ' : u'URI', u'РДС' : u'RDS', u'БПМ' : u'BPM', u'ИСРЦ' : u'ISRC' } for latin, cyrillic in compound.iteritems(): word = word.replace(latin, cyrillic) for latin, cyrillic in simple.iteritems(): word = word.replace(latin, cyrillic) for bad, good in exceptions.iteritems(): word = word.replace(bad, good) return word for line in oldLines: m = re.match(r'(.*)"(.*)"(.*)\n', line) if m: line = m.groups()[0] + '"' \ + cyrillize(m.groups()[1]) + '"' \ + m.groups()[2] + '\n' elif line == 'sr_CS:table\n': line = 'sr_CS_CYRILLIC:table\n' newLines += [line] codecs.open(fileNameOut, 'w', 'utf-8').writelines(newLines)