/* * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. * * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 */ /* UNIX(R) Regular Expresssion Library * * Note: Code is released under the GNU LGPL * * Copyright (C) 2001 Caldera International, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to: * Free Software Foundation, Inc. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* #include "synonyms.h" */ #include "colldata.h" #include #define CCE(p) ((const CollElem *)(p)) #define CCM(p) ((const CollMult *)(p)) LIBUXRE_STATIC const CollElem * libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc) { const char *tbl; size_t hi, lo, cur; const CollMult *cmp; const CollElem *cep; long diff; int sz; /* * ELEM_ENCODED is returned when the collation is entirely * based on the encoded value of the character. */ if (col == 0 || col->flags & CHF_ENCODED || (tbl = (const char *)col->maintbl) == 0) { return ELEM_ENCODED; } if ((wuchar_type)wc <= UCHAR_MAX) { indexed:; cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]); if (cep->weight[0] == WGHT_SPECIAL) return ELEM_BADCHAR; return cep; } if (col->flags & CHF_INDEXED) { if ((wuchar_type)wc >= col->nmain) return ELEM_BADCHAR; goto indexed; } /* * Binary search for a match. Could speed up the search if * some interpolation was used, but keep it simple for now. * Note that this is actually a table of CollMult's. * * To save space in the file, sequences of similar elements * are sometimes compressed into a single CollMult that * describes many entries. This is denoted by a subnbeg * with the SUBN_SPECIAL bit set. The rest of the bits give * the range covered by this entry. */ sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); tbl += (1 + UCHAR_MAX) * col->elemsize; lo = 0; hi = col->nmain - UCHAR_MAX; while (lo < hi) { if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */ cur |= ~(~(size_t)0 >> 1); /* lost high order bit */ cmp = CCM(&tbl[cur * sz]); if ((diff = wc - cmp->ch) < 0) hi = cur; else if (cmp->elem.subnbeg & SUBN_SPECIAL) { if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL)) lo = cur + 1; else /* create an entry from the sequence in spare */ { spare->multbeg = cmp->elem.multbeg; spare->subnbeg = 0; spare->weight[0] = cmp->elem.weight[0] + diff; for (lo = 1; lo < col->nweight; lo++) { wuchar_type w; if ((w = cmp->elem.weight[lo]) == WGHT_SPECIAL) { w = spare->weight[0]; } spare->weight[lo] = w; } return spare; } } else if (diff == 0) return &cmp->elem; else lo = cur + 1; } return ELEM_BADCHAR; }