diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index fb4763ef..0e979aa4 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -371,16 +371,22 @@ namespace ChocolArm64
             SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", AInstEmit.Smin_V,        typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V,       typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V,       typeof(AOpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V,       typeof(AOpCodeSimdReg));
             SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V,       typeof(AOpCodeSimdReg));
+            SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V,     typeof(AOpCodeSimdShImm));
             SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S,       typeof(AOpCodeSimd));
             SetA64("0x001110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_V,       typeof(AOpCodeSimd));
             SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S,      typeof(AOpCodeSimd));
             SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V,      typeof(AOpCodeSimd));
+            SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V,       typeof(AOpCodeSimdShImm));
+            SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V,       typeof(AOpCodeSimdShImm));
             SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V,        typeof(AOpCodeSimdReg));
             SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V,       typeof(AOpCodeSimdShImm));
-            SetA64("010111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_S,        typeof(AOpCodeSimdShImm));
-            SetA64("0x0011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V,        typeof(AOpCodeSimdShImm));
-            SetA64("0x0011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S,        typeof(AOpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms,       typeof(AOpCodeSimdMemMs));
             SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms,       typeof(AOpCodeSimdMemMs));
             SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss,       typeof(AOpCodeSimdMemSs));
@@ -419,9 +425,11 @@ namespace ChocolArm64
             SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V,       typeof(AOpCodeSimd));
             SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V,        typeof(AOpCodeSimdReg));
             SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V,       typeof(AOpCodeSimdShImm));
-            SetA64("011111110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_S,        typeof(AOpCodeSimdShImm));
-            SetA64("0x1011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
-            SetA64("0x1011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S,        typeof(AOpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
+            SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
             SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V,        typeof(AOpCodeSimdReg));
             SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V,        typeof(AOpCodeSimdReg));
             SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V,         typeof(AOpCodeSimd));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index 39331f96..2fc8f178 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -65,11 +65,12 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
             EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
 
-            for (int Index = 1; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 1; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
 
@@ -97,9 +98,10 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
 
@@ -190,84 +192,6 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private static void EmitSaturatingExtNarrow(AILEmitterCtx Context, bool SignedSrc, bool SignedDst, bool Scalar)
-        {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
-            int Elems = (!Scalar ? 8 >> Op.Size : 1);
-            int ESize = 8 << Op.Size;
-
-            int Part = (!Scalar & (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0);
-
-            int TMaxValue = (SignedDst ? (1 << (ESize - 1)) - 1 : (int)((1L << ESize) - 1L));
-            int TMinValue = (SignedDst ? -((1 << (ESize - 1))) : 0);
-
-            Context.EmitLdc_I8(0L);
-            Context.EmitSttmp();
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                AILLabel LblLe    = new AILLabel();
-                AILLabel LblGeEnd = new AILLabel();
-
-                EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);
-
-                Context.Emit(OpCodes.Dup);
-
-                Context.EmitLdc_I4(TMaxValue);
-                Context.Emit(OpCodes.Conv_U8);
-
-                Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe);
-
-                Context.Emit(OpCodes.Pop);
-
-                Context.EmitLdc_I4(TMaxValue);
-
-                Context.EmitLdc_I8(0x8000000L);
-                Context.EmitSttmp();
-
-                Context.Emit(OpCodes.Br_S, LblGeEnd);
-
-                Context.MarkLabel(LblLe);
-
-                Context.Emit(OpCodes.Dup);
-
-                Context.EmitLdc_I4(TMinValue);
-                Context.Emit(OpCodes.Conv_I8);
-
-                Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd);
-
-                Context.Emit(OpCodes.Pop);
-
-                Context.EmitLdc_I4(TMinValue);
-
-                Context.EmitLdc_I8(0x8000000L);
-                Context.EmitSttmp();
-
-                Context.MarkLabel(LblGeEnd);
-
-                if (Scalar)
-                {
-                    EmitVectorZeroLower(Context, Op.Rd);
-                }
-
-                EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
-            }
-
-            if (Part == 0)
-            {
-                EmitVectorZeroUpper(Context, Op.Rd);
-            }
-
-            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
-            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
-            Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr));
-            Context.EmitLdtmp();
-            Context.Emit(OpCodes.Conv_I4);
-            Context.Emit(OpCodes.Or);
-            Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr));
-        }
-
         public static void Fabd_S(AILEmitterCtx Context)
         {
             EmitScalarBinaryOpF(Context, () =>
@@ -338,7 +262,7 @@ namespace ChocolArm64.Instruction
 
             int SizeF = Op.Size & 1;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             int Elems = Bytes >> SizeF + 2;
             int Half  = Elems >> 1;
@@ -870,7 +794,7 @@ namespace ChocolArm64.Instruction
 
             int SizeF = Op.Size & 1;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
             {
@@ -1102,6 +1026,15 @@ namespace ChocolArm64.Instruction
             });
         }
 
+        public static void Smlsl_V(AILEmitterCtx Context)
+        {
+            EmitVectorWidenRnRmTernaryOpSx(Context, () =>
+            {
+                Context.Emit(OpCodes.Mul);
+                Context.Emit(OpCodes.Sub);
+            });
+        }
+
         public static void Smull_V(AILEmitterCtx Context)
         {
             EmitVectorWidenRnRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Mul));
@@ -1109,22 +1042,22 @@ namespace ChocolArm64.Instruction
 
         public static void Sqxtn_S(AILEmitterCtx Context)
         {
-            EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: true);
+            EmitScalarSaturatingNarrowOpSxSx(Context, () => { });
         }
 
         public static void Sqxtn_V(AILEmitterCtx Context)
         {
-            EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: true, Scalar: false);
+            EmitVectorSaturatingNarrowOpSxSx(Context, () => { });
         }
 
         public static void Sqxtun_S(AILEmitterCtx Context)
         {
-            EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: true);
+            EmitScalarSaturatingNarrowOpSxZx(Context, () => { });
         }
 
         public static void Sqxtun_V(AILEmitterCtx Context)
         {
-            EmitSaturatingExtNarrow(Context, SignedSrc: true, SignedDst: false, Scalar: false);
+            EmitVectorSaturatingNarrowOpSxZx(Context, () => { });
         }
 
         public static void Sub_S(AILEmitterCtx Context)
@@ -1198,11 +1131,12 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
             EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
 
-            for (int Index = 1; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 1; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
 
@@ -1272,12 +1206,12 @@ namespace ChocolArm64.Instruction
 
         public static void Uqxtn_S(AILEmitterCtx Context)
         {
-            EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: true);
+            EmitScalarSaturatingNarrowOpZxZx(Context, () => { });
         }
 
         public static void Uqxtn_V(AILEmitterCtx Context)
         {
-            EmitSaturatingExtNarrow(Context, SignedSrc: false, SignedDst: false, Scalar: false);
+            EmitVectorSaturatingNarrowOpZxZx(Context, () => { });
         }
     }
 }
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
index 68a7ab88..773d9894 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
@@ -363,7 +363,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
             int Elems = (!Scalar ? Bytes >> Op.Size : 1);
 
             ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size));
@@ -407,7 +407,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
             int Elems = (!Scalar ? Bytes >> Op.Size : 1);
 
             ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size));
@@ -454,7 +454,7 @@ namespace ChocolArm64.Instruction
 
             int SizeF = Op.Size & 1;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
             {
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
index da584743..7b355494 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
@@ -337,7 +337,7 @@ namespace ChocolArm64.Instruction
 
             int FBits = GetFBits(Context);
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             for (int Index = 0; Index < (Bytes >> SizeI); Index++)
             {
@@ -426,7 +426,7 @@ namespace ChocolArm64.Instruction
 
             int FBits = GetFBits(Context);
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             for (int Index = 0; Index < (Bytes >> SizeI); Index++)
             {
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index d895ec9c..1f7a2dad 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -3,6 +3,7 @@ using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System;
 using System.Reflection;
+using System.Reflection.Emit;
 using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
@@ -417,7 +418,7 @@ namespace ChocolArm64.Instruction
 
             int SizeF = Op.Size & 1;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++)
             {
@@ -467,7 +468,7 @@ namespace ChocolArm64.Instruction
 
             int SizeF = Op.Size & 1;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++)
             {
@@ -527,9 +528,10 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 if (Opers.HasFlag(OperFlags.Rd))
                 {
@@ -582,9 +584,10 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 if (Ternary)
                 {
@@ -622,9 +625,10 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 if (Binary)
                 {
@@ -739,11 +743,11 @@ namespace ChocolArm64.Instruction
             EmitVectorPairwiseOp(Context, Emit, false);
         }
 
-        private static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed)
+        public static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             int Elems = Bytes >> Op.Size;
             int Half  = Elems >> 1;
@@ -769,6 +773,117 @@ namespace ChocolArm64.Instruction
             }
         }
 
+        public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, true, true, true);
+        }
+
+        public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, true, false, true);
+        }
+
+        public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, false, false, true);
+        }
+
+        public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, true, true, false);
+        }
+
+        public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, true, false, false);
+        }
+
+        public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitSaturatingNarrowOp(Context, Emit, false, false, false);
+        }
+
+        public static void EmitSaturatingNarrowOp(
+            AILEmitterCtx Context,
+            Action        Emit,
+            bool          SignedSrc,
+            bool          SignedDst,
+            bool          Scalar)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int Elems = !Scalar ? 8 >> Op.Size : 1;
+            int ESize = 8 << Op.Size;
+
+            int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
+
+            long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L;
+            long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0;
+
+            Context.EmitLdc_I8(0L);
+            Context.EmitSttmp();
+
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                AILLabel LblLe    = new AILLabel();
+                AILLabel LblGeEnd = new AILLabel();
+
+                EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc);
+
+                Emit();
+
+                Context.Emit(OpCodes.Dup);
+
+                Context.EmitLdc_I8(TMaxValue);
+
+                Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe);
+
+                Context.Emit(OpCodes.Pop);
+
+                Context.EmitLdc_I8(TMaxValue);
+                Context.EmitLdc_I8(0x8000000L);
+                Context.EmitSttmp();
+
+                Context.Emit(OpCodes.Br_S, LblGeEnd);
+
+                Context.MarkLabel(LblLe);
+
+                Context.Emit(OpCodes.Dup);
+
+                Context.EmitLdc_I8(TMinValue);
+
+                Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd);
+
+                Context.Emit(OpCodes.Pop);
+
+                Context.EmitLdc_I8(TMinValue);
+                Context.EmitLdc_I8(0x8000000L);
+                Context.EmitSttmp();
+
+                Context.MarkLabel(LblGeEnd);
+
+                if (Scalar)
+                {
+                    EmitVectorZeroLower(Context, Op.Rd);
+                }
+
+                EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
+            }
+
+            if (Part == 0)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+
+            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+            Context.EmitLdarg(ATranslatedSub.StateArgIdx);
+            Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr));
+            Context.EmitLdtmp();
+            Context.Emit(OpCodes.Conv_I4);
+            Context.Emit(OpCodes.Or);
+            Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr));
+        }
+
         public static void EmitScalarSet(AILEmitterCtx Context, int Reg, int Size)
         {
             EmitVectorZeroAll(Context, Reg);
diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
index 8475a8a4..9f5af96c 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
@@ -55,7 +55,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
             int Elems = Bytes >> Op.Size;
 
             for (int Index = 0; Index < Elems; Index++)
@@ -195,7 +195,7 @@ namespace ChocolArm64.Instruction
                 throw new InvalidOperationException();
             }
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
             int Elems = Bytes >> Op.Size;
 
             int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1;
diff --git a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs
index d98ec012..368b014f 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdMemory.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdMemory.cs
@@ -105,13 +105,14 @@ namespace ChocolArm64.Instruction
                     throw new InvalidOperationException();
                 }
 
-                int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+                int Bytes = Op.GetBitsCount() >> 3;
+                int Elems = Bytes >> Op.Size;
 
                 for (int SElem = 0; SElem < Op.SElems; SElem++)
                 {
                     int Rt = (Op.Rt + SElem) & 0x1f;
 
-                    for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+                    for (int Index = 0; Index < Elems; Index++)
                     {
                         EmitMemAddress();
 
diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
index d67946a9..739f01c6 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
@@ -14,9 +14,10 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 Context.EmitLdintzr(Op.Rn);
 
@@ -42,9 +43,10 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size);
 
@@ -64,7 +66,7 @@ namespace ChocolArm64.Instruction
             Context.EmitLdvec(Op.Rd);
             Context.EmitStvectmp();
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             int Position = Op.Imm4;
 
@@ -329,7 +331,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             int Elems = Bytes >> Op.Size;
 
@@ -355,7 +357,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             int Elems = Bytes >> Op.Size;
             int Half  = Elems >> 1;
@@ -382,7 +384,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
 
             int Elems = Bytes >> Op.Size;
             int Half  = Elems >> 1;
diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
index 24d35abe..6f6b5606 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
@@ -27,9 +27,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Shift = Op.Imm - (8 << Op.Size);
-
-            EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
+            EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
         }
 
         public static void Shll_V(AILEmitterCtx Context)
@@ -45,22 +43,21 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
-
-            EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), Shift);
+            EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), GetImmShr(Op));
         }
 
         public static void Sli_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            int Shift = Op.Imm - (8 << Op.Size);
+            int Shift = GetImmShl(Op);
 
-            ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0;            
+            ulong Mask = Shift != 0 ? ulong.MaxValue >> (64 - Shift) : 0;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < Elems; Index++)
             {
                 EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
 
@@ -84,6 +81,39 @@ namespace ChocolArm64.Instruction
             }
         }
 
+        public static void Sqrshrn_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+            int Shift = GetImmShr(Op);
+
+            long RoundConst = 1L << (Shift - 1);
+
+            Action Emit = () =>
+            {
+                Context.EmitLdc_I8(RoundConst);
+
+                Context.Emit(OpCodes.Add);
+
+                Context.EmitLdc_I4(Shift);
+
+                Context.Emit(OpCodes.Shr);
+            };
+
+            EmitVectorSaturatingNarrowOpSxSx(Context, Emit);
+        }
+
+        public static void Srshr_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+            int Shift = GetImmShr(Op);
+
+            long RoundConst = 1L << (Shift - 1);
+
+            EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst);
+        }
+
         public static void Sshl_V(AILEmitterCtx Context)
         {
             EmitVectorShl(Context, Signed: true);
@@ -93,9 +123,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Shift = Op.Imm - (8 << Op.Size);
-
-            EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), Shift);
+            EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
         }
 
         public static void Sshr_S(AILEmitterCtx Context)
@@ -115,24 +143,20 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
-
-            EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift);
+            EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op));
         }
 
         public static void Ssra_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
-
             Action Emit = () =>
             {
                 Context.Emit(OpCodes.Shr);
                 Context.Emit(OpCodes.Add);
             };
 
-            EmitVectorShImmTernarySx(Context, Emit, Shift);
+            EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op));
         }
 
         public static void Ushl_V(AILEmitterCtx Context)
@@ -144,9 +168,7 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            int Shift = Op.Imm - (8 << Op.Size);
-
-            EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
+            EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op));
         }
 
         public static void Ushr_S(AILEmitterCtx Context)
@@ -251,28 +273,51 @@ namespace ChocolArm64.Instruction
             }
         }
 
+        [Flags]
+        private enum ShImmFlags
+        {
+            None = 0,
+
+            Signed  = 1 << 0,
+            Ternary = 1 << 1,
+            Rounded = 1 << 2,
+
+            SignedTernary = Signed | Ternary,
+            SignedRounded = Signed | Rounded
+        }
+
         private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, false, true);
+            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed);
         }
 
         private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, true, true);
+            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary);
         }
 
         private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            EmitVectorShImmOp(Context, Emit, Imm, false, false);
+            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None);
         }
 
-        private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, bool Ternary, bool Signed)
+        private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc)
+        {
+            EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc);
+        }
+
+        private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+            int Bytes = Op.GetBitsCount() >> 3;
+            int Elems = Bytes >> Op.Size;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            bool Signed  = (Flags & ShImmFlags.Signed)  != 0;
+            bool Ternary = (Flags & ShImmFlags.Ternary) != 0;
+            bool Rounded = (Flags & ShImmFlags.Rounded) != 0;
+
+            for (int Index = 0; Index < Elems; Index++)
             {
                 if (Ternary)
                 {
@@ -281,6 +326,13 @@ namespace ChocolArm64.Instruction
 
                 EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed);
 
+                if (Rounded)
+                {
+                    Context.EmitLdc_I8(Rc);
+
+                    Context.Emit(OpCodes.Add);
+                }
+
                 Context.EmitLdc_I4(Imm);
 
                 Emit();