convert_UTF: rewrite in C++

This allows us to namespace the symbols properly.

Bug: google-breakpad:725
Change-Id: Iea8052547eef6c0acb299c1995781735c6d8994f
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/1769236
Reviewed-by: Mark Mentovai <mark@chromium.org>
This commit is contained in:
Mike Frysinger 2019-08-03 12:12:40 -04:00
parent abfe08e789
commit db1cda2653
8 changed files with 53 additions and 44 deletions

View File

@ -170,7 +170,7 @@ src_client_linux_libbreakpad_client_a_SOURCES = \
src/client/minidump_file_writer-inl.h \
src/client/minidump_file_writer.cc \
src/client/minidump_file_writer.h \
src/common/convert_UTF.c \
src/common/convert_UTF.cc \
src/common/convert_UTF.h \
src/common/md5.cc \
src/common/md5.h \
@ -658,7 +658,7 @@ src_tools_mac_dump_syms_dump_syms_mac_LDADD= \
src_common_dumper_unittest_SOURCES = \
src/common/byte_cursor_unittest.cc \
src/common/convert_UTF.c \
src/common/convert_UTF.cc \
src/common/dwarf_cfi_to_module.cc \
src/common/dwarf_cfi_to_module_unittest.cc \
src/common/dwarf_cu_to_module.cc \

View File

@ -307,7 +307,7 @@ am__src_client_linux_libbreakpad_client_a_SOURCES_DIST = \
src/client/linux/minidump_writer/minidump_writer.cc \
src/client/minidump_file_writer-inl.h \
src/client/minidump_file_writer.cc \
src/client/minidump_file_writer.h src/common/convert_UTF.c \
src/client/minidump_file_writer.h src/common/convert_UTF.cc \
src/common/convert_UTF.h src/common/md5.cc src/common/md5.h \
src/common/string_conversion.cc src/common/string_conversion.h \
src/common/linux/elf_core_dump.cc src/common/linux/elfutils.cc \
@ -689,7 +689,7 @@ src_client_linux_linux_dumper_unittest_helper_LINK = $(CXXLD) \
$(src_client_linux_linux_dumper_unittest_helper_LDFLAGS) \
$(LDFLAGS) -o $@
am__src_common_dumper_unittest_SOURCES_DIST = \
src/common/byte_cursor_unittest.cc src/common/convert_UTF.c \
src/common/byte_cursor_unittest.cc src/common/convert_UTF.cc \
src/common/dwarf_cfi_to_module.cc \
src/common/dwarf_cfi_to_module_unittest.cc \
src/common/dwarf_cu_to_module.cc \
@ -2157,7 +2157,7 @@ CLEANFILES = $(am__append_12)
@LINUX_HOST_TRUE@ src/client/minidump_file_writer-inl.h \
@LINUX_HOST_TRUE@ src/client/minidump_file_writer.cc \
@LINUX_HOST_TRUE@ src/client/minidump_file_writer.h \
@LINUX_HOST_TRUE@ src/common/convert_UTF.c \
@LINUX_HOST_TRUE@ src/common/convert_UTF.cc \
@LINUX_HOST_TRUE@ src/common/convert_UTF.h src/common/md5.cc \
@LINUX_HOST_TRUE@ src/common/md5.h \
@LINUX_HOST_TRUE@ src/common/string_conversion.cc \
@ -2522,7 +2522,7 @@ TESTS = $(check_PROGRAMS) $(check_SCRIPTS)
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@src_common_dumper_unittest_SOURCES = \
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@ src/common/byte_cursor_unittest.cc \
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@ src/common/convert_UTF.c \
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@ src/common/convert_UTF.cc \
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@ src/common/dwarf_cfi_to_module.cc \
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@ src/common/dwarf_cfi_to_module_unittest.cc \
@DISABLE_TOOLS_FALSE@@LINUX_HOST_TRUE@ src/common/dwarf_cu_to_module.cc \
@ -5170,20 +5170,6 @@ src/common/android/src_client_linux_linux_client_unittest_shlib-breakpad_getcont
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
src/common/src_common_dumper_unittest-convert_UTF.o: src/common/convert_UTF.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.o -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.c' || echo '$(srcdir)/'`src/common/convert_UTF.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/common/convert_UTF.c' object='src/common/src_common_dumper_unittest-convert_UTF.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.c' || echo '$(srcdir)/'`src/common/convert_UTF.c
src/common/src_common_dumper_unittest-convert_UTF.obj: src/common/convert_UTF.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.obj -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.c'; then $(CYGPATH_W) 'src/common/convert_UTF.c'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.c'; fi`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/common/convert_UTF.c' object='src/common/src_common_dumper_unittest-convert_UTF.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.c'; then $(CYGPATH_W) 'src/common/convert_UTF.c'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.c'; fi`
.cc.o:
@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
@ -5662,6 +5648,20 @@ src/common/src_common_dumper_unittest-byte_cursor_unittest.obj: src/common/byte_
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o src/common/src_common_dumper_unittest-byte_cursor_unittest.obj `if test -f 'src/common/byte_cursor_unittest.cc'; then $(CYGPATH_W) 'src/common/byte_cursor_unittest.cc'; else $(CYGPATH_W) '$(srcdir)/src/common/byte_cursor_unittest.cc'; fi`
src/common/src_common_dumper_unittest-convert_UTF.o: src/common/convert_UTF.cc
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.o -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.cc' || echo '$(srcdir)/'`src/common/convert_UTF.cc
@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/common/convert_UTF.cc' object='src/common/src_common_dumper_unittest-convert_UTF.o' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.o `test -f 'src/common/convert_UTF.cc' || echo '$(srcdir)/'`src/common/convert_UTF.cc
src/common/src_common_dumper_unittest-convert_UTF.obj: src/common/convert_UTF.cc
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT src/common/src_common_dumper_unittest-convert_UTF.obj -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.cc'; then $(CYGPATH_W) 'src/common/convert_UTF.cc'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.cc'; fi`
@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-convert_UTF.Po
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/common/convert_UTF.cc' object='src/common/src_common_dumper_unittest-convert_UTF.obj' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o src/common/src_common_dumper_unittest-convert_UTF.obj `if test -f 'src/common/convert_UTF.cc'; then $(CYGPATH_W) 'src/common/convert_UTF.cc'; else $(CYGPATH_W) '$(srcdir)/src/common/convert_UTF.cc'; fi`
src/common/src_common_dumper_unittest-dwarf_cfi_to_module.o: src/common/dwarf_cfi_to_module.cc
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(src_common_dumper_unittest_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT src/common/src_common_dumper_unittest-dwarf_cfi_to_module.o -MD -MP -MF src/common/$(DEPDIR)/src_common_dumper_unittest-dwarf_cfi_to_module.Tpo -c -o src/common/src_common_dumper_unittest-dwarf_cfi_to_module.o `test -f 'src/common/dwarf_cfi_to_module.cc' || echo '$(srcdir)/'`src/common/dwarf_cfi_to_module.cc
@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/common/$(DEPDIR)/src_common_dumper_unittest-dwarf_cfi_to_module.Tpo src/common/$(DEPDIR)/src_common_dumper_unittest-dwarf_cfi_to_module.Po

View File

@ -82,7 +82,7 @@ LOCAL_SRC_FILES := \
src/client/linux/minidump_writer/minidump_writer.cc \
src/client/minidump_file_writer.cc \
src/common/android/breakpad_getcontext.S \
src/common/convert_UTF.c \
src/common/convert_UTF.cc \
src/common/md5.cc \
src/common/string_conversion.cc \
src/common/linux/elfutils.cc \
@ -100,4 +100,4 @@ LOCAL_EXPORT_LDLIBS := -llog
include $(BUILD_STATIC_LIBRARY)
# Done.
# Done.

View File

@ -30,7 +30,7 @@
// Author: waylonis@google.com (Dan Waylonis)
/*
g++ -I../ ../common/convert_UTF.c \
g++ -I../ ../common/convert_UTF.cc \
../common/string_conversion.cc \
minidump_file_writer.cc \
minidump_file_writer_unittest.cc \

View File

@ -40,13 +40,13 @@ BIN_DIR=.
THREAD_SRC=solaris_lwp.cc
SHARE_SRC=../../minidump_file_writer.cc\
../../../common/convert_UTF.cc\
../../../common/md5.cc\
../../../common/string_conversion.cc\
../../../common/solaris/file_id.cc\
minidump_generator.cc
HANDLER_SRC=exception_handler.cc\
../../../common/solaris/guid_creator.cc
SHARE_C_SRC=../../../common/convert_UTF.c
MINIDUMP_TEST_SRC=minidump_test.cc
EXCEPTION_TEST_SRC=exception_handler_test.cc
@ -54,11 +54,10 @@ EXCEPTION_TEST_SRC=exception_handler_test.cc
THREAD_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(THREAD_SRC))
SHARE_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(SHARE_SRC))
HANDLER_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o,$(HANDLER_SRC))
SHARE_C_OBJ=$(patsubst %.c,$(OBJ_DIR)/%.o,$(SHARE_C_SRC))
MINIDUMP_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(MINIDUMP_TEST_SRC))\
$(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
$(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
EXCEPTION_TEST_OBJ=$(patsubst %.cc,$(OBJ_DIR)/%.o, $(EXCEPTION_TEST_SRC))\
$(THREAD_OBJ) $(SHARE_OBJ) $(SHARE_C_OBJ) $(HANDLER_OBJ)
$(THREAD_OBJ) $(SHARE_OBJ) $(HANDLER_OBJ)
BIN=$(BIN_DIR)/minidump_test\
$(BIN_DIR)/exception_handler_test

View File

@ -61,7 +61,7 @@
'android/ucontext_constants.h',
'basictypes.h',
'byte_cursor.h',
'convert_UTF.c',
'convert_UTF.cc',
'convert_UTF.h',
'dwarf/bytereader-inl.h',
'dwarf/bytereader.cc',

View File

@ -60,10 +60,16 @@ See the header file "ConvertUTF.h" for complete documentation.
#include <stdio.h>
#endif
static const int halfShift = 10; /* used for shifting by 10 bits */
namespace google_breakpad {
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
namespace {
const int halfShift = 10; /* used for shifting by 10 bits */
const UTF32 halfBase = 0x0010000UL;
const UTF32 halfMask = 0x3FFUL;
} // namespace
#define UNI_SUR_HIGH_START (UTF32)0xD800
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
@ -183,6 +189,8 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
/* --------------------------------------------------------------------- */
namespace {
/*
* Index into the table below with the first byte of a UTF-8 sequence to
* get the number of trailing bytes that are supposed to follow it.
@ -190,7 +198,7 @@ ConversionResult ConvertUTF16toUTF32 (const UTF16** sourceStart, const UTF16* so
* left as-is for anyone who may want to do such conversion, which was
* allowed in earlier algorithms.
*/
static const char trailingBytesForUTF8[256] = {
const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -206,7 +214,7 @@ static const char trailingBytesForUTF8[256] = {
* This table contains as many values as there might be trailing bytes
* in a UTF-8 sequence.
*/
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
@ -216,7 +224,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total.
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
/* --------------------------------------------------------------------- */
@ -228,6 +236,8 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
* into an inline function.
*/
} // namespace
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd,
@ -299,6 +309,8 @@ return result;
/* --------------------------------------------------------------------- */
namespace {
/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
@ -309,8 +321,7 @@ return result;
* If presented with a length > 4, this returns false. The Unicode
* definition of UTF-8 goes up to 4-byte sequences.
*/
static Boolean isLegalUTF8(const UTF8 *source, int length) {
Boolean isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
@ -335,6 +346,8 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
return true;
}
} // namespace
/* --------------------------------------------------------------------- */
/*
@ -552,3 +565,5 @@ In UTF-8 writing code, the switches on "bytesToWrite" are
similarly unrolled loops.
--------------------------------------------------------------------- */
} // namespace google_breakpad

View File

@ -106,6 +106,8 @@ All should be unsigned values to avoid sign extension during
bit mask & shift operations.
------------------------------------------------------------------------ */
namespace google_breakpad {
typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
@ -130,11 +132,6 @@ typedef enum {
lenientConversion
} ConversionFlags;
/* This is for C++ and does no harm in C */
#ifdef __cplusplus
extern "C" {
#endif
ConversionResult ConvertUTF8toUTF16 (const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
@ -155,9 +152,7 @@ ConversionResult ConvertUTF32toUTF16 (const UTF32** sourceStart, const UTF32* so
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
#ifdef __cplusplus
}
#endif
} // namespace google_breakpad
/* --------------------------------------------------------------------- */