// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module builtin

// utf8_char_len returns the length in bytes of a UTF-8 encoded codepoint that starts with the byte `b`.
pub fn utf8_char_len(b u8) int {
	return int(((u32(0xe5000000) >> ((b >> 3) & 0x1e)) & 3) + 1)
}

// Convert utf32 to utf8
// utf32 == Codepoint
pub fn utf32_to_str(code u32) string {
	unsafe {
		mut buffer := malloc_noscan(5)
		res := utf32_to_str_no_malloc(code, mut buffer)
		if res.len == 0 {
			// the buffer was not used at all
			free(buffer)
		}
		return res
	}
}

@[manualfree; unsafe]
pub fn utf32_to_str_no_malloc(code u32, mut buf &u8) string {
	unsafe {
		len := utf32_decode_to_buffer(code, mut buf)
		if len == 0 {
			return ''
		}
		buf[len] = 0
		return tos(buf, len)
	}
}

@[manualfree; unsafe]
pub fn utf32_decode_to_buffer(code u32, mut buf &u8) int {
	unsafe {
		icode := int(code) // Prevents doing casts everywhere
		mut buffer := &u8(buf)
		if icode <= 127 {
			// 0x7F
			buffer[0] = u8(icode)
			return 1
		} else if icode <= 2047 {
			// 0x7FF
			buffer[0] = 192 | u8(icode >> 6) // 0xC0 - 110xxxxx
			buffer[1] = 128 | u8(icode & 63) // 0x80 - 0x3F - 10xxxxxx
			return 2
		} else if icode <= 65535 {
			// 0xFFFF
			buffer[0] = 224 | u8(icode >> 12) // 0xE0 - 1110xxxx
			buffer[1] = 128 | (u8(icode >> 6) & 63) // 0x80 - 0x3F - 10xxxxxx
			buffer[2] = 128 | u8(icode & 63) // 0x80 - 0x3F - 10xxxxxx
			return 3
		}
		// 0x10FFFF
		else if icode <= 1114111 {
			buffer[0] = 240 | u8(icode >> 18) // 0xF0 - 11110xxx
			buffer[1] = 128 | (u8(icode >> 12) & 63) // 0x80 - 0x3F - 10xxxxxx
			buffer[2] = 128 | (u8(icode >> 6) & 63) // 0x80 - 0x3F - 10xxxxxx
			buffer[3] = 128 | u8(icode & 63) // 0x80 - 0x3F - 10xxxxxx
			return 4
		}
	}
	return 0
}

// Convert utf8 to utf32
// the original implementation did not check for
// valid utf8 in the string, and could result in
// values greater than the utf32 spec
// it has been replaced by `utf8_to_utf32` which
// has an option return type.
//
// this function is left for backward compatibility
// it is used in vlib/builtin/string.v,
// and also in vlib/v/gen/c/cgen.v
pub fn (_rune string) utf32_code() int {
	if _rune.len > 4 {
		return 0
	}
	return int(impl_utf8_to_utf32(_rune.str, _rune.len))
}

// convert array of utf8 bytes to single utf32 value
// will error if more than 4 bytes are submitted
pub fn (_bytes []u8) utf8_to_utf32() !rune {
	if _bytes.len > 4 {
		return error('attempted to decode too many bytes, utf-8 is limited to four bytes maximum')
	}
	return impl_utf8_to_utf32(_bytes.data, _bytes.len)
}

@[direct_array_access]
fn impl_utf8_to_utf32(_bytes &u8, _bytes_len int) rune {
	if _bytes_len == 0 || _bytes_len > 4 {
		return 0
	}
	// return ASCII unchanged
	if _bytes_len == 1 {
		return rune(unsafe { _bytes[0] })
	}

	match _bytes_len {
		2 {
			b0 := rune(unsafe { _bytes[0] })
			b1 := rune(unsafe { _bytes[1] })
			return ((b0 & 0x1F) << 6) | (b1 & 0x3F)
		}
		3 {
			b0 := rune(unsafe { _bytes[0] })
			b1 := rune(unsafe { _bytes[1] })
			b2 := rune(unsafe { _bytes[2] })
			return ((b0 & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F)
		}
		4 {
			b0 := rune(unsafe { _bytes[0] })
			b1 := rune(unsafe { _bytes[1] })
			b2 := rune(unsafe { _bytes[2] })
			b3 := rune(unsafe { _bytes[3] })
			return ((b0 & 0x07) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F)
		}
		else {
			return 0
		}
	}
}

// Calculate string length for formatting, i.e. number of "characters"
// This is simplified implementation. if you need specification compliant width,
// use utf8.east_asian.display_width.
pub fn utf8_str_visible_length(s string) int {
	return utf8_grapheme_visible_length(s)
}

// string_to_ansi_not_null_terminated returns an ANSI version of the string `_str`.
// NOTE: This is most useful for converting a vstring to an ANSI string under Windows.
// NOTE: The ANSI string return is not null-terminated, then you can use `os.write_file_array` write an ANSI file.
pub fn string_to_ansi_not_null_terminated(_str string) []u8 {
	wstr := _str.to_wide()
	mut ansi := wide_to_ansi(wstr)
	if ansi.len > 0 {
		unsafe { ansi.len-- } // remove tailing zero
	}
	return ansi
}