C’est marrant, j’avais fait un patch relativement propre à l’époque !
--- finddupes.glade 2010-10-24 17:05:33.000000000 +0200
+++ finddupes.glade 2010-12-17 18:43:53.000000000 +0100
@@ -264,6 +264,25 @@
</packing>
</child>
<child>
+ <object class="GtkCheckButton" id="phonex">
+ <property name="label" translatable="yes">Use phonex codes</property>
+ <property name="visible">True</property>
+ <property name="can_focus">True</property>
+ <property name="receives_default">False</property>
+ <property name="use_underline">True</property>
+ <property name="active">True</property>
+ <property name="draw_indicator">True</property>
+ </object>
+ <packing>
+ <property name="left_attach">1</property>
+ <property name="right_attach">2</property>
+ <property name="top_attach">5</property>
+ <property name="bottom_attach">6</property>
+ <property name="x_options">GTK_FILL</property>
+ <property name="y_options"></property>
+ </packing>
+ </child>
+ <child>
<object class="GtkComboBox" id="menu">
<property name="visible">True</property>
<property name="model">liststore1</property>
--- FindDupes.py 2010-12-06 10:02:02.000000000 +0100
+++ FindDupes.py 2010-12-17 18:51:58.000000000 +0100
@@ -41,6 +41,7 @@
from gui.utils import ProgressMeter
from gui.plug import tool
import soundex
+import phonex
from gen.display.name import displayer as name_displayer
from QuestionDialog import OkDialog
import ListModel
@@ -87,6 +88,7 @@
#-------------------------------------------------------------------------
class Merge(tool.Tool,ManagedWindow.ManagedWindow):
def __init__(self, dbstate, uistate, options_class, name, callback=None):
tool.Tool.__init__(self, dbstate, options_class, name)
@@ -102,12 +104,14 @@
self.removed = {}
self.update = callback
self.use_soundex = 1
+ self.use_phonex = 0
top = Glade()
# retrieve options
threshold = self.options.handler.options_dict['threshold']
use_soundex = self.options.handler.options_dict['soundex']
+ use_phonex = self.options.handler.options_dict['phonex']
my_menu = gtk.ListStore(str, object)
for val in sorted(_val2label):
@@ -117,6 +121,10 @@
self.soundex_obj.set_active(use_soundex)
self.soundex_obj.show()
+ self.phonex_obj = top.get_object("phonex")
+ self.phonex_obj.set_active(use_phonex)
+ self.phonex_obj.show()
+
self.menu = top.get_object("menu")
self.menu.set_model(my_menu)
self.menu.set_active(0)
@@ -158,6 +166,7 @@
def on_merge_ok_clicked(self, obj):
threshold = self.menu.get_model()[self.menu.get_active()][1]
self.use_soundex = int(self.soundex_obj.get_active())
+ self.use_phonex = int(self.phonex_obj.get_active())
try:
self.find_potentials(threshold)
except AttributeError, msg:
@@ -166,6 +175,7 @@
self.options.handler.options_dict['threshold'] = threshold
self.options.handler.options_dict['soundex'] = self.use_soundex
+ self.options.handler.options_dict['phonex'] = self.use_phonex
# Save options
self.options.handler.save_options()
@@ -252,6 +262,11 @@
return soundex.soundex(val)
except UnicodeEncodeError:
return val
+ elif self.use_phonex:
+ try:
+ return phonex.phonex_fr(val)
+ except UnicodeEncodeError:
+ return val
else:
return val
@@ -667,12 +682,16 @@
# Options specific for this report
self.options_dict = {
'soundex' : 1,
+ 'phonex' : 0,
'threshold' : 0.25,
}
self.options_help = {
'soundex' : ("=0/1","Whether to use SoundEx codes",
["Do not use SoundEx","Use SoundEx"],
True),
+ 'phonex' : ("=0/1","Whether to use PhonEx codes",
+ ["Do not use PhonEx","Use PhonEx"],
+ True),
'threshold' : ("=num","Threshold for tolerance",
"Floating point number")
}
#
# -*- coding: UTF-8 -*-
#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 1999 Frédéric Brouard
# Copyright (C) 1999 Florence Marquis
# Copyright (C) 2005 Christian Pennaforte
# Copyright (C) 2005 Florent Carlier
# Copyright (C) 2010 FR #4468
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
"""
Provide phonex calculation
"""
#-------------------------------------------------------------------------
#
# Standard python modules
#
#-------------------------------------------------------------------------
import string
import unicodedata
import re
#-------------------------------------------------------------------------
#
# constants
#
#-------------------------------------------------------------------------
IGNORE = "HW~!@#$%^&*()_+=-`[]\|;:'/?.,<>\" \t\f\v"
TABLE = string.maketrans('ABCDEFGIJKLMNOPQRSTUVXYZ',
'012301202245501262301202')
#-------------------------------------------------------------------------
#
# phonex - returns the phonex value for the specified string
#
#-------------------------------------------------------------------------
def phonex_fr(strval):
"Return the phonex value to a string argument for french language."
if strval is None:
return "Z000"
r = strval.encode('UTF-8')
# 1 remplacer les y par des i
r = r.replace('Y','I')
# voir 7
r = r.replace(u'É','Y')
r = r.replace(u'È','Y')
r = r.replace(u'Ê','Y')
r = unicodedata.normalize('NFKD',
unicode(strval.upper().strip())).encode('ASCII', 'ignore')
if not r:
return "Z000"
# 2 supprimer les h qui ne sont pas précédées de c ou de s ou de p
r = re.sub(r'([^P|C|S])H', r'\1', r)
# 3 remplacement du ph par f
r = r.replace(r'PH', r'F')
# 4 remplacer les groupes de lettres suivantes :
r = re.sub(r'G(AI?[N|M])',r'K\1', r)
# 5 remplacer les occurrences suivantes, si elles sont suivies par
# une lettre a, e, i, o, ou u :
r = re.sub(r'[A|E]I[N|M]([A|E|I|O|U])',r'YN\1', r)
# 6 remplacement de groupes de 3 lettres (sons 'o', 'oua', 'ein') :
r = r.replace('EAU','O')
r = r.replace('OUA','2')
r = r.replace('EIN','4')
r = r.replace('AIN','4')
r = r.replace('EIM','4')
r = r.replace('AIM','4')
# 7 remplacement du son É:
# voir plus haut
r = r.replace('AI','Y')
r = r.replace('EI','Y')
r = r.replace('ER','YR')
r = r.replace('ESS','YS')
r = r.replace('ET','YT')
r = r.replace('EZ','YZ')
# 8 remplacer les groupes de 2 lettres suivantes (son â..anâ..
# et â..inâ..), sauf sâ..il sont suivi par une lettre a, e, i o,
# u ou un son 1 Ã 4 :
r = re.sub(r'AN([^A|E|I|O|U|1|2|3|4])',r'1\1', r)
r = re.sub(r'ON([^A|E|I|O|U|1|2|3|4])',r'1\1', r)
r = re.sub(r'AM([^A|E|I|O|U|1|2|3|4])',r'1\1', r)
r = re.sub(r'EN([^A|E|I|O|U|1|2|3|4])',r'1\1', r)
r = re.sub(r'EM([^A|E|I|O|U|1|2|3|4])',r'1\1', r)
r = re.sub(r'IN([^A|E|I|O|U|1|2|3|4])',r'4\1', r)
# 9 remplacer les s par des z sâ..ils sont suivi et précédés des
# lettres a, e, i, o,u ou dâ..un son 1 Ã 4
r = re.sub(r'([A|E|I|O|U|Y|1|2|3|4])S([A|E|I|O|U|Y|1|2|3|4])',r'\1Z\2',r)
# 10 remplacer les groupes de 2 lettres suivants :
r = r.replace('OE','E')
r = r.replace('EU','E')
r = r.replace('AU','O')
r = r.replace('OI','2')
r = r.replace('OY','2')
r = r.replace('OU','3')
# 11 remplacer les groupes de lettres suivants
r = r.replace('CH','5')
r = r.replace('SCH','5')
r = r.replace('SH','5')
r = r.replace('SS','S')
r = r.replace('SC','S')
# 12 remplacer le c par un s s'il est suivi d'un e ou d'un i
r = re.sub(r'C([E|I])',r'S\1',r)
# 13 remplacer les lettres ou groupe de lettres suivants :
r = r.replace('C','K')
r = r.replace('Q','K')
r = r.replace('QU','K')
r = r.replace('GU','K')
r = r.replace('GA','KA')
r = r.replace('GO','KO')
r = r.replace('GY','KY')
# 14 remplacer les lettres suivante :
r = r.replace('A','O')
r = r.replace('D','T')
r = r.replace('P','T')
r = r.replace('J','G')
r = r.replace('B','F')
r = r.replace('V','F')
r = r.replace('M','N')
# 15 Supprimer les lettres dupliquées
oldc='#'
newr=''
for c in r:
if oldc != c:
newr=newr+c
oldc=c
r = newr
#16 Supprimer les terminaisons suivantes : t, x
r = re.sub(r'(.*)[T|X]$',r'\1', r)
str2 = r[0]
r = r.translate(TABLE, IGNORE)
if not r:
return "Z000"
prev = r[0]
for character in r[1:]:
if character != prev and character != "0":
str2 = str2 + character
prev = character
# pad with zeros
str2 = str2+"0000"
return str2[:4]
#-------------------------------------------------------------------------
#
# compare - compares the phonex values of two strings
#
#-------------------------------------------------------------------------
def compare(str1, str2):
"1 if strings are close. 0 otherwise."
return phonex_fr(str1) == phonex_fr(str2)
Pas testé, mais il existe d’autres contributions (ou versions), par exemple, des ajouts complémentaires (depuis 2010):
# 17 Affecter à chaque lettre le code numérique correspondant en partant de la dernière lettre
num = ['1', '2', '3', '4', '5', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'N', 'O', 'R', 'S', 'T', 'U', 'W', 'X', 'Y', 'Z']
l = []
for c in r:
l.append(num.index(c))
# 18 Convertissez les codes numériques ainsi obtenu en un nombre de base 22 exprimé en virgule flottante.
res = 0.
i = 1
for n in l:
res = n * 22 ** -i + res
i = i + 1
ce truc en base 22 et virgule flottante, c’est pour la frime ou cela apporte vraiment quelque chose ?
Les instructions à l’AI/IA (AI/IA tools ou outil local ?) risquent d’être un peu longues…