Author: Michael R. Crusoe <crusoe@debian.org>
Description: support non-x86 systems via libsimde-dev
--- kmc.orig/kmer_counter/cpu_info.cpp
+++ kmc/kmer_counter/cpu_info.cpp
@@ -38,6 +38,7 @@
 	string vendor, brand;
 	void cpuid(int *result, int function_id) const
 	{
+#if defined(__x86_64__) || defined(__i386__)
 #ifdef _MSC_VER
 		__cpuidex(result, function_id, 0);
 
@@ -53,10 +54,12 @@
 		__asm__("cpuid\n\t"
 			: "=a" (result[0]), "=b" (result[1]), "=c" (result[2]), "=d" (result[3]) : "0" (function_id), "c"(0));
 #endif  
+#endif
 	}
 
 	CpuInfoImpl()
 	{
+#if defined(__x86_64__) || defined(__i386__)
 		array<int, 4> cpui = { -1 };
 		cpuid(cpui.data(), 0);
 		int nIds_ = cpui[0];
@@ -90,6 +93,9 @@
 			std::bitset<32> EBX = data_[7][1];
 			avx2 = EBX[5];
 		}
+#else
+		sse2=true;
+#endif
 	}
 
 	const string& GetVendor() const
@@ -145,4 +151,4 @@
 bool CCpuInfo::AVX_Enabled() { return cpu_info_impl.avx; }
 bool CCpuInfo::AVX2_Enabled() { return cpu_info_impl.avx2; }
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmer_counter/intr_copy.h
+++ kmc/kmer_counter/intr_copy.h
@@ -11,8 +11,8 @@
 #ifndef _INTR_COPY_H
 #define _INTR_COPY_H
 
-#include <emmintrin.h>
-#include <immintrin.h>
+#define SIMDE_ENABLE_NATIVE_ALIASES
+#include <simde/x86/sse2.h> 
 
 #ifndef WIN32
 typedef long long __int64;
@@ -27,7 +27,7 @@
 	__int64* src = (__int64 *)_src;
 
 	for (unsigned i = 0; i < size; ++i)
-		_mm_stream_si64(dest + i, src[i]);
+		simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
 }
 
 
@@ -41,7 +41,7 @@
 		__int64* src = (__int64*)_src;
 
 		for (unsigned i = 0; i < SIZE; ++i)
-			_mm_stream_si64(dest + i, src[i]);
+			simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, dest + i), src[i]);
 	}
 };
 
@@ -89,4 +89,4 @@
 
 #endif
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmer_counter/kmc.h
+++ kmc/kmer_counter/kmc.h
@@ -1112,6 +1112,8 @@
 #ifdef __APPLE__
 	sort_func = RadixSort::RadixSortMSD<CKmer<SIZE>, SIZE>;
 	CSmallSort<SIZE>::Adjust(384);
+#elif !defined(__x86_64__) && !defined(__i386__)
+	sort_func = RadulsSort::RadixSortMSD_SSE2<CKmer<SIZE>>;
 #else	
 	auto proc_name = CCpuInfo::GetBrand();
 	bool is_intel = CCpuInfo::GetVendor() == "GenuineIntel";
--- kmc.orig/makefile
+++ kmc/makefile
@@ -30,11 +30,16 @@
 $(KMC_MAIN_DIR)/kmer.o \
 $(KMC_MAIN_DIR)/splitter.o \
 $(KMC_MAIN_DIR)/kb_collector.o
+ifeq (1,$(SIMD))
 RADULS_OBJS = \
 $(KMC_MAIN_DIR)/raduls_sse2.o \
 $(KMC_MAIN_DIR)/raduls_sse41.o \
 $(KMC_MAIN_DIR)/raduls_avx2.o \
 $(KMC_MAIN_DIR)/raduls_avx.o
+else
+RADULS_OBJS = \
+$(KMC_MAIN_DIR)/raduls_sse2.o
+endif
 
 KMC_DUMP_OBJS = \
 $(KMC_DUMP_DIR)/nc_utils.o \
@@ -66,6 +71,7 @@
 $(KMC_TOOLS_OBJS): %.o: %.cpp
 	$(CXX) $(CPPFLAGS) $(KMC_TOOLS_CXXFLAGS) -c $< -o $@
 
+ifeq (1,$(SIMD))
 $(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -msse2 -c $< -o $@
 $(KMC_MAIN_DIR)/raduls_sse41.o: $(KMC_MAIN_DIR)/raduls_sse41.cpp
@@ -74,6 +80,10 @@
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -mavx -c $< -o $@
 $(KMC_MAIN_DIR)/raduls_avx2.o: $(KMC_MAIN_DIR)/raduls_avx2.cpp
 	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -mavx2 -c $< -o $@
+else
+$(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
+	$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@
+endif
 
 kmc: $(KMC_OBJS) $(RADULS_OBJS)
 	-mkdir -p $(KMC_BIN_DIR)
--- kmc.orig/kmer_counter/splitter.cpp
+++ kmc/kmer_counter/splitter.cpp
@@ -350,7 +350,7 @@
 				if (!both_strands && is_rev_comp) //if read is reversed and kmc was run to count all (not only canonical) kmers read must be transformed back
 				{
 					//static const char rev_maping[] = "=TGMCRSVAWYHKDBN";
-					static const char rev_maping[] = { -1, 3, 2, -1, 1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1 };// "=TGMCRSVAWYHKDBN";
+					static const int rev_maping[] = { -1, 3, 2, -1, 1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1 };// "=TGMCRSVAWYHKDBN";
 					uint32 n_bytes = l_seq / 2;
 					uint64_t pos_after = pos + l_seq;
 					pos = pos_after;
@@ -369,7 +369,7 @@
 				}
 				else
 				{
-					static const char maping[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1 };//"=ACMGRSVTWYHKDBN";
+					static const int maping[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1 };//"=ACMGRSVTWYHKDBN";
 					uint32 n_bytes = l_seq / 2;
 					for (uint32_t ii = 0; ii < n_bytes; ++ii)
 					{
@@ -956,4 +956,4 @@
 template class CWSmallKSplitter<uint32>;
 template class CWSmallKSplitter<uint64>;
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmer_counter/raduls.h
+++ kmc/kmer_counter/raduls.h
@@ -23,7 +23,7 @@
 {
 	template<typename KMER_T>
 	void RadixSortMSD_SSE2(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
-
+#if defined(__x86_64__) || defined(__i386__)
 	template<typename KMER_T>
 	void RadixSortMSD_SSE41(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
 
@@ -32,8 +32,9 @@
 
 	template<typename KMER_T>
 	void RadixSortMSD_AVX2(KMER_T* kmers, KMER_T* tmp, uint64 n_recs, uint32 byte, uint32 n_threads, CMemoryPool* pmm_radix_buf);
+#endif
 }
 
 #endif // RADULS_H
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
--- kmc.orig/kmer_counter/raduls_impl.h
+++ kmc/kmer_counter/raduls_impl.h
@@ -730,7 +730,7 @@
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_AVX
 #elif defined(__SSE4_1__)
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE41
-#elif defined(__SSE2__)
+#else
 #define RADULS_RADIX_SORT_FUNNAME RadixSortMSD_SSE2
 #endif
 
@@ -769,4 +769,4 @@
 
 #endif
 
-// ***** EOF
\ No newline at end of file
+// ***** EOF
